********************************************************************************
*	PROJECT: Childhood confidence and long-term outcomes (PSID)
*	PURPOSE: Build analysis datasets from cleaned CDS, TAS, and PSID data 
*	PUBLISHED: August 2022
*	CONTACT: Hannah Ruebeck, hruebeck@mit.edu
********************************************************************************
clear all
set more off
set maxvar 10000
pause on


** Paths;
local path "/Users/XXXXX/Downloads/replication materials"

local DO "`path'/code"
local CLEAN "`path'/clean"
local OUT "`path'/out/tables"

local RAW "`path'/raw"

/*
Description: This code merges together the cleaned CDS, TAS, and PSID into the 
main panel dataset that will be used for analysis.   
*/

********************************************************************************
// PROGRAMS

program get_pval, rclass
    syntax, b(string) se(string) [df(string) normal]
	
	if "`df'"!="" & "`normal'"!="" {
	    di as error "df() is only specified for a t-distribution and cannot be jointly specified with normal"
		error	
	}
	
    local se = subinstr("`se'", "(", "", .)
    local se = subinstr("`se'", ")", "", .)
    if "`df'"!="" {
		local p = 2*ttail(`df',abs(`b'/`se'))
		if `p' >.1 		 local star = ""
		if `p' >.05 & `p' <=.1   local star = "$^{*}$"
		if `p' >.01 & `p' <=.05  local star = "$^{**}$"
		if `p' <=.01  		 local star = "$^{***}$" 
    }
    if "`normal'"!="" {
		local p =  2*(1-normal(abs(`b'/`se')))
		if `p' >.1 		 local star = ""  
		if `p' >.05 & `p' <=.1   local star = "$^{*}$"
		if `p' >.01 & `p' <=.05  local star = "$^{**}$"
		if `p' <=.01  		 local star = "$^{***}$"
    }
    return local star = "`star'"

end 



program classify_2000
syntax,  var(string) [pref(string)]
	gen `pref'_2000_occ_stem = (inlist(`var', 30, 70) | inrange(`var', 100, 156) | inrange(`var', 160, 176) | inrange(`var', 191, 194)) if !mi(`var')
	gen `pref'_2000_occ_health = ( inrange(`var', 300, 351) | inlist(`var', 353) | inrange(`var', 360, 365)) if !mi(`var')
	gen `pref'_2000_occ_othhiskll = (inrange(`var', 1, 14) | inlist(`var', 23, 35, 36, 42, 62, 71, 281, 283, 284, 285, 536) ///
						| inrange(`var', 80, 95) | inrange(`var', 180, 186) | inrange(`var', 200, 202) | inrange(`var', 210, 254)) if !mi(`var')
	
	egen `pref'_2000_occ_stem_health = rowmax(`pref'_2000_occ_stem `pref'_2000_occ_health) if !mi(`var')
end


program classify_2010
syntax,  var(string) [pref(string)]
	gen `pref'_2010_occ_stem = (inlist(`var', 300, 700) | inrange(`var', 1000, 1560) | inrange(`var', 1600, 1760) | inrange(`var', 1910, 1940)) if !mi(`var')
	gen `pref'_2010_occ_health = ( inrange(`var', 3000, 3510) | inlist(`var', 3535) | inrange(`var', 3600, 3649)) if !mi(`var')
	gen `pref'_2010_occ_othhiskll = (inrange(`var', 10, 140) | inlist(`var', 230, 350, 360, 420, 710, 2810, 2825, 2830, 2840, 2850, 5360) ///
						| inrange(`var', 630, 650) | inrange(`var', 800, 950) | inrange(`var', 1800, 1860) | inrange(`var', 2000, 2025) | inrange(`var', 2100, 2540)) if !mi(`var')
	
	egen `pref'_2010_occ_stem_health = rowmax(`pref'_2010_occ_stem `pref'_2010_occ_health) if !mi(`var')

end

program classify_1970
syntax,  var(string) [pref(string)]
	gen `pref'_1970_occ_stem = inrange(`var', 2, 21) | `var'==23 | inrange(`var', 34, 54) | inrange(`var', 150, 162) | inlist(`var', 25, 55) if !mi(`var')
	gen `pref'_1970_occ_health = ( inrange(`var', 61, 76) | inrange(`var', 80, 85) | `var'==426 | inrange(`var', 921, 926)) if !mi(`var')
	gen `pref'_1970_occ_othhiskll = inlist(`var', 1, 56, 100, 174, 181, 184, 192, 202, 210, 212, 240, 270, 330, 363, 382, 954) | (inrange(`var', 30, 33)  ///
						| inrange(`var', 231, 235) | inrange(`var', 91, 96) | inrange(`var', 102, 145)) if !mi(`var')
	
	egen `pref'_1970_occ_stem_health = rowmax(`pref'_1970_occ_stem `pref'_1970_occ_health) if !mi(`var')
end


********************************************************************************
// MERGE TOGETHER ALL OF THE DATA

* start with PSID individual level dataset from 1997-2019
use "`CLEAN'/master_psid_indiv.dta", clear
merge m:1 uid using "`CLEAN'/stratum.dta" 
assert _merge!=1
drop if _merge==2
drop _merge

* merge in CDS which is in 1997, 2002/3, and 2007 -- calling 2002 2003 for merging purposes
merge 1:1 uid year using "`CLEAN'/master_cds.dta"
gen missing_indiv_file_2003 = _merge==2
gen in_cds_year = inlist(_merge, 2, 3)
drop _merge
 
* merge in TAS which is in 2005-2019, odd years
merge 1:1 uid year using "`CLEAN'/master_tas.dta"
gen in_tas_year = _merge==3
drop _merge
   
* merge in the PSID household level data
replace hh_interviewnum = cds_int_num_2002 if missing_indiv_file_2003==1 
merge m:1 hh_interviewnum year using "`CLEAN'/master_psid_hh.dta"
assert year==1996 if _merge==1
drop _merge
  
********************************************************************************
// IDENTIFY THE CDS/TAS SAMPLE
bys uid: egen in_cds_ever = max(in_cds_year)
bys uid: egen in_tas_ever = max(in_tas_year)

keep if in_cds_ever==1 

drop if year==1996

* WEIGHTS
foreach var in ch97prwt ch02prwt ch07prwt {
    bys uid: egen temp = max(dem_`var')
    drop dem_`var'
    rename temp `var'
}

* FILL IN CLUSTER INFO TO THOSE MISSING 2003 INDIVIDUAL FILE
 
bys uid: ereplace sampling_stratum = max(sampling_stratum)
bys uid: ereplace sampling_cluster = max(sampling_cluster)
  
********************************************************************************
// IDENTIFY OCCUPATION CAGETORIES

foreach var in ta_e20mainoccforjob1_2000code ta_e20mainoccforjob2_2000code hh_hd_occupationcode2000 hh_wf_occupationcode2000 {
	if substr("`var'", 1, 2)=="ta" & strpos("`var'", "forjob1")>0 		local pref "ta1" 
	if substr("`var'", 1, 2)=="ta" & strpos("`var'", "forjob2")>0		local pref "ta2"
	if substr("`var'", 1, 5)=="hh_hd"									local pref "hh_hd" 
	if substr("`var'", 1, 5)=="hh_wf"									local pref "hh_wf" 
	
	classify_2000, var(`var') pref(`pref')
}
 

 foreach var in ta_d26mainocc_2010codejob1 ta_d26mainocc_2010codejob2 hh_hd_occupationcode2010 hh_wf_occupationcode2010 {
	if substr("`var'", 1, 2)=="ta" & strpos("`var'", "codejob1")>0 		local pref "ta1" 
	if substr("`var'", 1, 2)=="ta" & strpos("`var'", "codejob2")>0		local pref "ta2"
	if substr("`var'", 1, 5)=="hh_hd"									local pref "hh_hd" 
	if substr("`var'", 1, 5)=="hh_wf"									local pref "hh_wf" 
	
	classify_2010, var(`var') pref(`pref')
}

foreach var in  hh_hd_occupationcode1970 hh_wf_occupationcode1970 {
	local pref = substr("`var'", 1, 5)

	classify_1970, var(`var') pref(`pref')
	
}

foreach adult in hh_hd hh_wf ta1 ta2 {
	foreach type in stem health othhiskll stem_health {
		gen `adult'_occ_`type' = `adult'_2000_occ_`type' if year<2017
		replace `adult'_occ_`type' = `adult'_2010_occ_`type' if year>=2017
		if strpos("`adult'", "hh")==1 replace `adult'_occ_`type' = `adult'_1970_occ_`type' if year<2003
		
		if strpos("`adult'", "hh")==1 drop `adult'_1970_occ_`type' `adult'_2000_occ_`type' `adult'_2010_occ_`type'
		else drop `adult'_2000_occ_`type' `adult'_2010_occ_`type'
	}
}

foreach type in stem health othhiskll stem_health {
    egen ta_occ_`type' = rowmax(ta1_occ_`type' ta2_occ_`type')
    drop ta1_occ_`type' ta2_occ_`type'
} 
 
********************************************************************************
// CLEAN FAMILY VARIABLES 

* family structure and mom/dad figure unemployment, age
 
gen flag = missing_indiv_file_2003==1 & mi(pi_birthyear)
bys uid: egen hasflag = max(flag)
sort uid year 
replace pi_birthyear = pi_birthyear[_n-1] if hasflag==1 & mi(pi_birthyear) & !mi(pi_birthyear[_n-1]) & uid==uid[_n-1]
drop flag hasflag

gen age = year - pi_birthyear 
gen under18 = age<=18
gen ishead = pi_reltohead==10
gen iswife = inlist(pi_reltohead, 20, 22)
gen isheadorwife = inlist(pi_reltohead, 10, 20, 22)
gen twoadulthh = hh_haswifeinterview

* parent control vars
gen unemp_dad = hh_hd_unemployed_pryr  if under18==1 & hh_hd_male==1
replace unemp_dad = hh_wf_unemployed_pryr if under18==1 & hh_hd_male==0 & hh_haswifeinterview==1 

gen unemp_mom = hh_hd_unemployed_pryr if under18==1 & hh_hd_male==0
replace unemp_mom = hh_wf_unemployed_pryr if under18==1 & hh_hd_male==1 & hh_haswifeinterview==1

gen agewhenborn_dad = hh_hd_age-age if year==1997 & hh_hd_male==1
replace agewhenborn_dad = hh_wf_age-age if year==1997 & hh_hd_male==0 & hh_haswifeinterview==1

gen agewhenborn_mom = hh_hd_age-age if year==1997 & hh_hd_male==0
replace agewhenborn_mom = hh_wf_age-age if year==1997 & hh_hd_male==1 & hh_haswifeinterview==1 

foreach var in atleast_grad_hs atleast_grad_hs_ged atleast_grad_bach atleast_grad_postbach {
    gen `var'_dad = hh_hd_`var' if under18==1 & hh_hd_male==1
    replace `var'_dad = hh_wf_`var' if under18==1 & hh_hd_male==0 & hh_haswifeinterview==1

    gen `var'_mom = hh_hd_`var' if under18==1 & hh_hd_male==0
    replace `var'_mom = hh_wf_`var' if under18==1 & hh_hd_male==1 & hh_haswifeinterview==1   
}  

foreach type in stem stem_health health othhiskll {

	gen occ_`type'_dad = hh_hd_occ_`type' if under18==1 & hh_hd_male==1
	replace occ_`type'_dad = hh_wf_occ_`type' if under18==1 & hh_hd_male==0 & hh_haswifeinterview==1 

	gen occ_`type'_mom = hh_hd_occ_`type' if under18==1 & hh_hd_male==0
	replace occ_`type'_mom = hh_wf_occ_`type' if under18==1 & hh_hd_male==1 & hh_haswifeinterview==1
}  

* fill in child race from parent race where needed -- there are 7 cases and they have same wife and head race in all cases unless one is missing, and one case where hd is other and wf is white which is being coded as other

* white
replace chld_white = 1 if hh_hd_race1==1 & mi(chld_white) & year==1997
replace chld_hisp = 0 if chld_white==1 & mi(chld_hisp) & year==1997
replace chld_black = 0 if chld_white==1 & mi(chld_black) & year==1997

* black
replace chld_black = 1 if hh_hd_race1==2 & mi(chld_black) & year==1997
replace chld_black = 1 if hh_wf_race1==2 & mi(chld_black) & year==1997
replace chld_hisp = 0 if chld_black==1 & mi(chld_hisp) & year==1997
replace chld_white = 0 if chld_black==1 & mi(chld_white) & year==1997

* hispanic
replace chld_hisp = 1 if hh_hd_race1==5 & mi(chld_hisp) & year==1997
replace chld_white = 0 if chld_hisp==1 & mi(chld_white) & year==1997
replace chld_black = 0 if chld_hisp==1 & mi(chld_black) & year==1997

* other
replace chld_hisp = 0 if hh_hd_race1==7 & mi(chld_hisp) & year==1997
replace chld_white = 0 if hh_hd_race1==7 & mi(chld_white) & year==1997
replace chld_black = 0 if hh_hd_race1==7 & mi(chld_black) & year==1997
 
********************************************************************************
// CLEAN SELF OUTCOMES FOR EMPLOYMENT, INCOME, MARRIAGE

gen unemp_self = ta_unemployed_pryr if age>=16 & ishead==0 & iswife==0
replace unemp_self = hh_hd_unemployed_pryr if age>=16 & ishead==1
replace unemp_self = hh_wf_unemployed_pryr if age>=16 & iswife==1

foreach var in atleast_grad_hs atleast_grad_hs_ged atleast_grad_bach atleast_grad_postbach {
    gen `var'_self = ta_`var' if age>=16 & ishead==0 & iswife==0
    replace `var'_self = hh_hd_`var' if age>=16 & ishead==1
    replace `var'_self = hh_wf_`var' if age>=16 & iswife==1
    
    bys uid: egen ever_`var'_self = max(`var'_self)
    
}
  
gen psid_marital_cohabitationstatus = 1 if ishead==1 & inlist(hh_headmaritalstatus, 1, 3)
replace psid_marital_cohabitationstatus = 1 if iswife==1 & hh_headcouplestatus ==1
replace psid_marital_cohabitationstatus = 2 if ishead==1 & inlist(hh_headmaritalstatus, 4, 5)
replace psid_marital_cohabitationstatus = 3 if (ishead==1 | iswife==1) & inlist(hh_headcouplestatus, 2, 3, 4) & mi(psid_marital_cohabitationstatus)
replace psid_marital_cohabitationstatus = 4 if ishead==1 & hh_headmaritalstatus==2 & mi(psid_marital_cohabitationstatus)

gen psid_married = psid_marital_cohabitationstatus==1 if !mi(psid_marital_cohabitationstatus)
gen psid_divorced = psid_marital_cohabitationstatus==2 if !mi(psid_marital_cohabitationstatus)
gen psid_cohabitating = psid_marital_cohabitationstatus==3 if !mi(psid_marital_cohabitationstatus)

bys uid: egen psid_evermarried = max(psid_married)
bys uid: egen psid_everdivorced = max(psid_divorced)
replace psid_evermarried = 1 if psid_everdivorced==1
bys uid: egen psid_evermarriedcohabit = max(psid_cohabit)
replace psid_evermarriedcohabit = 1 if psid_evermarried==1

foreach var in evermarried everdivorced evermarriedcohabit {
    rename ta_`var' temp 
    bys uid: egen ta_`var' = max(temp)
    drop temp
    
    egen `var' = rowmax(psid_`var' ta_`var')
}

gen evermarried_neverdiv = evermarried - everdivorced

replace pi_numkids = . if isheadorwife==0
replace pi_numkids = 0 if mi(pi_numkids) & isheadorwife==1
bys uid: egen psid_max_num_child = max(pi_numkids)
gen max_num_child = psid_max_num_child

egen ta_earnings = rowmax(ta_earningsfromworklastyear2016d ta_f1hm_earnlastyear2016d)
gen earnings_self = hh_hd_laborinc_pryr2016d if ishead==1
replace earnings_self = hh_wf_laborinc_pryr2016d if iswife==1
replace earnings_self = ta_earnings if isheadorwife==0
gen ln_earnings_self = ln(earnings_self)


foreach type in stem stem_health health othhiskll {

gen occ_`type'_self = hh_hd_occ_`type' if ishead==1
replace occ_`type'_self = hh_wf_occ_`type' if iswife==1
replace occ_`type'_self = ta_occ_`type' if isheadorwife==0
} 

* urban/rural
gen hh_urban_bigcentral = hh_rural_urban==1 if !mi(hh_rural_urban)
gen hh_urban_bigplusfringe = inlist(hh_rural_urban, 1, 2) if !mi(hh_rural_urban)

foreach type in bigcentral bigplusfringe {
    gen urban_`type'_self = hh_urban_`type' if ishead==1 | iswife==1 
}

  
*************
* PARENT/TEACHER OVER/UNDER BELIEFS ABOUT EXPECTED EDUCATION
rename ever_atleast_grad_postbach_self ever_atleast_grad_grad_self
replace ever_atleast_grad_grad_self = . if ever_atleast_grad_bach_self==0

gen final_educ = 4 if ever_atleast_grad_grad_self==1
replace final_educ = 3 if ever_atleast_grad_bach_self==1 & ever_atleast_grad_grad_self==0
replace final_educ = 2 if (ever_atleast_grad_hs_self==1 | ever_atleast_grad_hs_ged_self==1) & ever_atleast_grad_bach_self==0
replace final_educ = 1 if ever_atleast_grad_hs_self==0 & ever_atleast_grad_hs_ged_self==0

gen max_educ_postbach = final_educ==4 if !mi(final_educ)
gen max_educ_bach = final_educ==3 if !mi(final_educ)
gen max_educ_hs_ged = final_educ==2 if !mi(final_educ)
gen max_educ_none = final_educ==1 if !mi(final_educ)

gen parent_pred_final_educ = 4 if inlist(pcgc_expectededuc, 7, 8)
	replace parent_pred_final_educ = 3 if inlist(pcgc_expectededuc, 6)
	replace parent_pred_final_educ = 2 if inlist(pcgc_expectededuc, 2, 3, 4, 5)
	replace parent_pred_final_educ = 1 if inlist(pcgc_expectededuc, 1)
	
gen tch_pred_final_educ = 4 if inlist(emtch_expectededuc, 7, 8)
	replace tch_pred_final_educ = 3 if inlist(emtch_expectededuc, 6)
	replace tch_pred_final_educ = 2 if inlist(emtch_expectededuc, 2, 3, 4, 5)
	replace tch_pred_final_educ = 1 if inlist(emtch_expectededuc, 1)
	
********************************************************************************
// PREP VARIABLES TO MAKE CONFIDENCE VARIABLES AND OTHER AGE VARIABLES  
  
* child age at time confidence was measured
drop dem_birthmoyr pi_birthmoyr
rename pi_mode_birthmoyr mode_birthmoyr

gen flag = mi(mode_birthmoyr)
bys uid: egen hasflag = max(flag)
replace mode_birthmoyr = mode_birthmoyr[_n-1] if hasflag==1 & mi(mode_birthmoyr) & !mi(mode_birthmoyr[_n-1]) & uid==uid[_n-1]
drop flag hasflag
 
gen confidencemoyr = moyr_chiw if haschiw==1
replace confidencemoyr = moyr_pcgiw if mi(moyr_chiw) & haschiw==1 // 75% occur in the same month and 94% of ch interviews occur within a month of pcg interview
gen age_meas_conf = (confidencemoyr - mode_birthmoyr)/12
 
gen age_meas_conf_bucket = 1 if age_meas_conf>=2.75 & age_meas_conf<8
replace age_meas_conf_bucket = 2 if age_meas_conf>=8 & age_meas_conf<12
replace age_meas_conf_bucket = 3 if age_meas_conf>=12 & age_meas_conf<15
replace age_meas_conf_bucket = 4 if age_meas_conf>=15 & !mi(age_meas_conf)
label def agebucket 0 "[0, 2.75)" 1 "[2.75-8)" 2 "[8-12)" 3 "[12-15)" 4 "[15-19.5]"
label values age_meas_conf_bucket agebucket

gen tchiwmoyr = moyr_emtchiw 
replace tchiwmoyr = moyr_pstchiw if mi(tchiwmoyr)
gen age_tch_meas_ability = (tchiwmoyr - mode_birthmoyr)/12
gen age_tch_meas_ability_bucket = 0 if age_tch_meas_ability<2.75
replace age_tch_meas_ability_bucket = 1 if age_tch_meas_ability>=2.75 & age_tch_meas_ability<8
replace age_tch_meas_ability_bucket = 2 if age_tch_meas_ability>=8 & age_tch_meas_ability<12
replace age_tch_meas_ability_bucket = 3 if age_tch_meas_ability>=12 & age_tch_meas_ability<15
replace age_tch_meas_ability_bucket = 4 if age_tch_meas_ability>=15 & !mi(age_tch_meas_ability)
label values age_tch_meas_ability_bucket agebucket

********************************************************************************
// CREATE MAIN CONFIDENCE VARIABLES  

foreach subj in math read {
if "`subj'"=="math" local test "ap"
if "`subj'"=="read" local test "total"
	
	* count number of available observations
	gen hasconfandscore_`subj' = !mi(chld_`subj'cnf_rateownskill) & !mi(chld_`subj'_`test'_pctile)
	bys uid: egen num_hasconfandscore_`subj' = total(hasconfandscore_`subj')
 
	* calculate averages (includes averages of one number for kids with just one observation)
	bys uid: egen chld_ave_`subj'cnf_rateownskill = mean(chld_`subj'cnf_rateownskill) if hasconfandscore_`subj'==1
	bys uid: egen chld_ave_`subj'_`test'_pctile = mean(chld_`subj'_`test'_pctile) if hasconfandscore_`subj'==1

	* indicators for percentile buckets, over/underconfidence:
	foreach pref in "" "ave_" {

		* indicators for pctile groups
		foreach abovenum in 50 75 60 85 40 65 {
			if strpos("`abovenum'", "5")!=1 local geq ">="
			else local geq ">" 
			gen `subj'_`pref'pctileabove`abovenum' = chld_`pref'`subj'_`test'_pctile `geq' `abovenum' if !mi(chld_`pref'`subj'_`test'_pctile)
		}
		foreach belownum in 25 50 15 40 35 60 {
			local leq "<="
			gen `subj'_`pref'pctilebelow`belownum' = chld_`pref'`subj'_`test'_pctile `leq' `belownum' if !mi(chld_`pref'`subj'_`test'_pctile)
			
		}
		
		* raw confidence measure
		local var chld_`pref'`subj'cnf_rateownskill

		* over and under-confidence conditional on test scores: 5 versions
		foreach type in orig mstrict1 mstrict2 lstrict1 lstrict2 {
			if "`type'"=="orig" local list 50 75 50 25
			if "`type'"=="mstrict1" local list 60 85 40 15
			if "`type'"=="mstrict2" local list 50 85 50 25
			if "`type'"=="lstrict1" local list 40 65 60 35
			if "`type'"=="lstrict2" local list 40 65 40 15
			
			local u1: word 1 of `list'
			local u2: word 2 of `list'
			local o1: word 3 of `list'
			local o2: word 4 of `list'
			
			if "`type'"=="lstrict2" {
				local olist1 "6, 7"
				local olist2 "5, 6, 7"
			}
			else {
				local olist1 "7"
				local olist2 "6, 7"
			}
			
			gen `subj'_`pref'overcnf_`type' = (inlist(`var', `olist1') & `subj'_`pref'pctilebelow`o1'==1) ///
											  | (inlist(`var', `olist2') & `subj'_`pref'pctilebelow`o2'==1) ///
										   if !mi(`var') & !mi(chld_`pref'`subj'_`test'_pctile)
			gen `subj'_`pref'undrcnf_`type' = (inrange(`var', 1, 3) & `subj'_`pref'pctileabove`u1' == 1) ///
										   | (inrange(`var', 1, 4) & `subj'_`pref'pctileabove`u2' == 1) ///
										   if !mi(`var') & !mi(chld_`pref'`subj'_`test'_pctile)			
		}
		
		* and the whiskers version (attempted more data-driven version of the above)
		
		local type whisk
			gen `subj'_`pref'overcnf_`type' = .
			gen `subj'_`pref'undrcnf_`type' = .
			forval i = 1/4 {
				forval j = 1/7 {
					count if age_meas_conf_bucket==`i' & !mi(`var') & !mi(chld_`subj'_`test'_pctile)
					if `r(N)'>0 {
						
						* most cases
						sum chld_`subj'_`test'_pctile if age_meas_conf_bucket==`i' & `var'==`j', d
						replace `subj'_`pref'overcnf_`type' = 1 if chld_`subj'_`test'_pctile < `r(p25)' & age_meas_conf_bucket==`i' & `var'==`j'
						replace `subj'_`pref'overcnf_`type' = 0 if chld_`subj'_`test'_pctile >= `r(p25)' & age_meas_conf_bucket==`i' & `var'==`j'
						replace `subj'_`pref'undrcnf_`type' = 0 if chld_`subj'_`test'_pctile <= `r(p75)' & age_meas_conf_bucket==`i' & `var'==`j'
						replace `subj'_`pref'undrcnf_`type' = 1 if chld_`subj'_`test'_pctile > `r(p75)' & age_meas_conf_bucket==`i' & `var'==`j' & !mi(chld_`subj'_`test'_pctile)
						
						* edge cases
						
						// can't be overconfident if you scored really low and said 1
						replace `subj'_`pref'overcnf_`type' = 0 if `j' == 1 &  chld_`subj'_`test'_pctile < `r(p25)' & age_meas_conf_bucket==`i'
						
						// can't be underconfident if you scored really high and said 7
						replace `subj'_`pref'undrcnf_`type' = 0 if `j' == 7 &  chld_`subj'_`test'_pctile > `r(p75)' & age_meas_conf_bucket==`i'
												
					}
				}
			}
		
		* a more conservative version of whiskers
		local type whisk1  
			gen `subj'_`pref'overcnf_`type' = `subj'_`pref'overcnf_whisk
			gen `subj'_`pref'undrcnf_`type' = `subj'_`pref'undrcnf_whisk
			replace `subj'_`pref'overcnf_`type' = 0 if `subj'_`pref'overcnf_whisk==1 & `var'<4
			replace `subj'_`pref'undrcnf_`type' = 0 if `subj'_`pref'undrcnf_whisk==1 & `var'>4
		
	}
	
	* robustness measure: average categorization of over/underconfidence in the multiple waves. 
	
	foreach type in orig mstrict1 mstrict2 lstrict1 lstrict2 {
			
		bys uid: egen `subj'_overcnf_`type'_ind = mean(`subj'_overcnf_`type') if !mi(`subj'_overcnf_`type') 
		bys uid: egen `subj'_undrcnf_`type'_ind = mean(`subj'_undrcnf_`type') if !mi(`subj'_undrcnf_`type') 
	}
	

}  


foreach var of varlist *_ave_* {
	bys uid: ereplace `var' = max(`var')
}

* some prep for later
foreach subj in math read {
    gen chld_`subj'cnf_howeasy = 1 if chld_`subj'cnf_howhard==7
    replace chld_`subj'cnf_howeasy = 2 if chld_`subj'cnf_howhard==6
    replace chld_`subj'cnf_howeasy = 3 if chld_`subj'cnf_howhard==5
    replace chld_`subj'cnf_howeasy = 4 if chld_`subj'cnf_howhard==4
    replace chld_`subj'cnf_howeasy = 5 if chld_`subj'cnf_howhard==3
    replace chld_`subj'cnf_howeasy = 6 if chld_`subj'cnf_howhard==2
    replace chld_`subj'cnf_howeasy = 7 if chld_`subj'cnf_howhard==1
}

* prep to find first and second observed confidence and ability, corresponding age (main independent variables) + some controls
 
assert mi(math_overcnf_orig) if mi(math_undrcnf_orig)
assert mi(math_undrcnf_orig) if mi(math_overcnf_orig)
gen hasconf = !mi(math_overcnf_orig)
bys uid: egen temp = min(year) if hasconf==1
bys uid: egen fst_year_has_conf = max(temp)
drop temp
bys uid: egen temp = min(year) if hasconf==1 & year!=fst_year_has_conf
bys uid: egen sec_year_has_conf = max(temp)
drop temp


gen chld_math_ap_pctile_dv = chld_math_ap_pctile/10
gen chld_read_total_pctile_dv = chld_read_total_pctile/10


********************************************************************************
// CLEANING UP RHS VARIABLES THAT ARE CONSTANT RATHER THAN PANELS

**First renaming to shorten names
rename (pcgc_evergifted pcgc_eversped pcgc_everrepeatedgrade) ///
	(evergifted eversped everrepeatedgrade)
rename pcgc_frpl frpl
rename dem_sblnum numsibsinhh
rename chld_gencnf_dothingsaswellasoth chld_gencnf_dothingwellasoth
foreach var in mathcnf_rateownskill mathcnf_skillreltopeers ///
	mathcnf_skillrelothersubj mathcnf_expectedperf mathcnf_goodatlearnnew mathcnf_howeasy ///
	mathcnf_howuseful mathcnf_imptobegood mathcnf_interesting mathcnf_enjoy ///
	readcnf_rateownskill readcnf_skillreltopeers ///
	readcnf_skillrelothersubj readcnf_expectedperf readcnf_goodatlearnnew readcnf_howeasy ///
	readcnf_howuseful readcnf_imptobegood readcnf_interesting readcnf_enjoy ///
	hasacne hassevereacne gencnf_doimportantthings gencnf_likebeingmyself ///
	gencnf_lottobeproudof gencnf_dothingwellasoth gencnf_goodthingsaboutme ///
	gencnf_goodasothers gencnf_dothingswell {
    rename chld_`var' `var'
	}
rename (chld_expects4yrcollegedeg chld_expectsmorethancollege  pcgc_expects4yrcollegedeg ///
	pcgc_expectsmorethancollege  emtch_expects4yrcollegedeg emtch_expectsmorethancollege) ///
	(chld_exp4yrcolldeg chld_expmorethancoll  pcgc_exp4yrcolldeg ///
	pcgc_expmorethancoll  emtch_exp4yrcolldeg emtch_expmorethancoll)
	
rename (chld_expectsatleastsomecollege pcgc_expectsatleastsomecollege emtch_expectsatleastsomecollege) ///
(chld_expatleastsomecoll pcgc_expatleastsomecoll emtch_expatleastsomecoll) 
rename dem_chfemale female
rename (pcgc_everspanked pcgc_brstfd pcgc_fatherinhh) (everspanked brstfd fatherinhh)
rename (pcgc_readtochildmorethan1perwk pcgc_artsandcraftsmorethan1perwk pcgc_playedsportsmorethan1perwk ///
	pcgc_homeworkmorethan1perwk pcgc_boardgamesmorethan1perwk pcgc_physaffectmorethan1perwk ///
	pcgc_saidlovemorethan1perwk) (pp_read pp_art pp_sports pp_homework pp_games pp_physaffect pp_saidily)
rename pcgc_appreciateeveryday pp_appreciateeveryday
rename (pcgh_mostimpqual_obey pcgh_mostimpqual_popular pcgh_mostimpqual_thinkforself pcgh_mostimpqual_workhard ///
	pcgh_mostimpqual_helpothers) (mostimp_obey mostimp_popular mostimp_thinkforself mostimp_workhard ///
	mostimp_helpothers)
rename (emadm_schpctwhitestudents emadm_schpctblackstudents emadm_schpcthispanicstudents) ///
	(schpctwhite schpctblack schpcthispanic)
rename (emadm_schpctfrpl emadm_perpupilexpenditure emadm_schstutchratio) ///
	(schpctfrpl schppe schstutchratio)
rename emtch_yearsatschool tch_yearsatschool

********************************************************************************
// INDIVIDUAL CONTROL VARIABLES
	
***Want this material for weight calculation
foreach var in female chld_white chld_black chld_hisp  {
    foreach year in 1997 2003 2007 {
         local yr = substr("`year'", 3, 2)
         gen temp = `var' if year==`year'
	 bys uid: egen `var'_`yr' = max(temp)
	 drop temp
    }
    gen c_`var' = `var'_97
    replace c_`var' = `var'_03 if mi(c_`var')
    replace c_`var' = `var'_07 if mi(c_`var')
    
    gen m_`var' = mi(c_`var')
    sum m_`var'
    if `r(mean)'==0 & `r(sd)'==0 drop m_`var'
    else {
        gen r_`var' = c_`var'
        replace r_`var' = 0 if mi(c_`var')
    }
    drop `var'_*
}
 
 
* HH or Individual variables

gen familytaxableincome2016d = hh_hdwftaxableinc_pryr2016d // +hh_ofumtaxableinc_pryr2016d 
rename hh_fipscurrentstate state
rename hh_currentregion region
rename (pi_birthquarter pi_birthyear) (birthquarter birthyear)
rename (hh_rectransfers_pryr hh_recssi_pryr) (recgovtransfers recssi)
rename hh_mainmealtogethgeq6days mainmealtogethgeq6days
rename hh_publichousing publichousing
rename hh_recfoodstamps_pryr recfoodstamps
rename hh_totalfamilyinc_pryr2016d familytotalincome2016d

rename pi_birthorder birthorder   
	
gen yr = substr(string(year, "%4.0f"), 3, 2)	
foreach var of varlist state birthquarter birthyear recgovtransfers recssi ///
	mainmealtogethgeq6days publichousing recfoodstamps familytotalincome2016d familytaxableincome2016d ///
	atleast_grad_*dad atleast_grad_*mom hh_hd_atleast_grad_hs_ged hh_hd_atleast_grad_hs ///
	hh_hd_atleast_grad_bach hh_hd_atleast_grad_postbach birthorder twoadulthh occ_*_mom occ_*_dad region {
    forval year = 1997(2)2009 {
             local yr = substr("`year'", 3, 2)
             gen temp = `var' if year==`year' & year>=fst_year_has_conf & year<=fst_year_has_conf+3
	     replace temp = `var' if year==1997 & mi(temp) & mi(fst_year_has_conf)
	     bys uid: egen `var'_`yr' = max(temp)
	     drop temp
        }    
    gen c_`var' = `var'_97
    forval year = 1997(2)2009 {
        local yr = substr("`year'", 3, 2)
        replace c_`var' = `var'_`yr' if mi(c_`var')
    }    
    gen m_`var' = mi(c_`var')
    sum m_`var'
    if `r(mean)'==0 & `r(sd)'==0 drop m_`var'
    else {
       gen r_`var' = c_`var'
       replace r_`var' = 0 if mi(c_`var')
    }
    drop `var'_*
}
replace c_familytotalincome2016d = 0 if c_familytotalincome2016d < 0
replace c_familytaxableincome2016d = 0 if c_familytaxableincome2016d < 0
gen c_familytaxableincome2016d_sq = c_familytaxableincome2016d^2
 
* variables that come from a particular year/are already fixed
bys uid: egen temp = max(pi_famid1997)
drop pi_famid1997
rename temp famid1997

rename dem_childofheads childofheads
foreach var in childofheads agewhenborn_dad agewhenborn_mom {
    gen temp = `var' if year==1997
    drop `var'
    bys uid: egen `var' = max(temp)
    drop temp
}

foreach var in childofheads agewhenborn_dad agewhenborn_mom {
    rename `var' c_`var'
    gen m_`var' = mi(c_`var')
    sum m_`var'
    if `r(mean)'==0 & `r(sd)'==0 drop m_`var'
    else {
       gen r_`var' = c_`var'
       replace r_`var' = 0 if mi(c_`var')
    }
}

foreach var in everarrested everjailed everhadpsychdiag {
   bys uid: egen `var' = max(ta_`var')
   drop ta_`var'
}
bys uid: egen sd = sd(ta_major_cat)
assert inlist(sd, 0, .)
drop sd
bys uid: egen temp = max(ta_major_cat)
replace ta_major_cat = temp
rename ta_major_cat cat_major
drop temp
gen stemhlthmajor = inlist(cat_major, 2, 3, 4, 5) if !mi(cat_major)
gen stemmajor = inlist(cat_major, 2, 4, 5) if !mi(cat_major)
 
bys uid: egen temp = max(ta_agefirstchild)
replace ta_agefirstchild = temp
rename ta_agefirstchild agefirstchild
drop temp

foreach person in chld pcgc emtch {
    foreach var in expatleastsomecoll exp4yrcolldeg expmorethancoll {
       bys uid: egen temp = max(`person'_`var')
       replace `person'_`var' = temp
       drop temp
       rename `person'_`var' max_`person'_`var'
}
}

gen round_age_meas_conf = round(age_meas_conf)

********************************************************************************
// CREATE FIRST AND SECOND MEASUREMENTS OF VARS THAT DON'T REQUIRE STANDARDIZATION

* Now separating the first and second measurements of confidence and raw ability
local prefixlist "fst sec"
local rename_fst = 0
foreach var of varlist math_*rcnf* read_*rcnf* age_meas_conf_bucket age_meas_conf ///
	chld_digitspantotal_raw chld_math_ap_pctile_dv chld_read_total_pctile_dv ///
	chld_math_ap_pctile chld_read_total_pctile *pctilea* *pctileb* ///
	mathcnf_rateownskill  readcnf_rateownskill  ///
	evergifted eversped everrepeatedgrade frpl hasacne hassevereacne  ///
	parent_pred_final_educ tch_pred_final_educ ///
	mathcnf_skillreltopeers ///
	mathcnf_skillrelothersubj mathcnf_expectedperf mathcnf_goodatlearnnew mathcnf_howeasy ///
	mathcnf_howuseful mathcnf_imptobegood mathcnf_interesting mathcnf_enjoy ///
	readcnf_skillreltopeers ///
	readcnf_skillrelothersubj readcnf_expectedperf readcnf_goodatlearnnew readcnf_howeasy ///
	readcnf_howuseful readcnf_imptobegood readcnf_interesting readcnf_enjoy ///
	chld_feelpartofschool chld_feelclosetopeers ///
	emtch_read_ability emtch_math_ability emtch_physical_ability ///
	pcgc_childnumfriends pcgc_expectededuc chld_expectededuc numsibsinhh ///
	pp_read pp_art pp_sports pp_homework pp_games pp_physaffect ///
	pp_saidily everspanked brstfd fatherinhh pp_appreciateeveryday mostimp_obey ///
	mostimp_thinkforself mostimp_workhard mostimp_helpothers schpctwhite schpctblack ///
	schpcthispanic schpctfrpl schppe schstutchratio tch_yearsatschool {
    
	if strpos("`var'", "_ave_")==0 {
    if "`var'"=="evergifted" local prefixlist "fst"
    if "`var'"=="pp_read" local rename_fst = 1

    foreach prefix in `prefixlist' {
        gen temp = `var' if year==`prefix'_year_has_conf
		if "`prefix'"=="fst" replace temp = `var' if year==1997 & mi(fst_year_has_conf)
        bys uid: egen `prefix'_`var' = max(temp)
        drop temp
		gen m_`prefix'_`var' = mi(`prefix'_`var')
		sum m_`prefix'_`var'

        if `r(mean)'==0 & `r(sd)'==0 {
	    drop m_`prefix'_`var'
	    local rename "rename (c_`prefix'_`var') (c_`var')"
		}
        else {
            gen r_`prefix'_`var' = `prefix'_`var'
            replace r_`prefix'_`var' = 0 if mi(`prefix'_`var')
			local rename "rename (c_`prefix'_`var' r_`prefix'_`var' m_`prefix'_`var') (c_`var' r_`var' m_`var')"
        }
	if "`prefix'"=="fst" rename `prefix'_`var' c_`prefix'_`var'
	if `rename_fst'==1 `rename'
    }     
	}
} 
 
 
********************************************************************************
// IDENTIFYING THE CONFIDENCE SAMPLE

* find samples with confidence measures
assert mi(c_fst_math_overcnf_orig) if mi(c_fst_math_undrcnf_orig)
assert mi(c_fst_math_undrcnf_orig) if mi(c_fst_math_overcnf_orig)

assert mi(c_fst_read_overcnf_orig) if mi(c_fst_read_undrcnf_orig)
assert mi(c_fst_read_undrcnf_orig) if mi(c_fst_read_overcnf_orig)
 
gen confidence_sample = !mi(c_fst_math_overcnf_orig) & !mi(c_fst_age_meas_conf)
gen confidence_sample_read = !mi(c_fst_read_overcnf_orig) & !mi(c_fst_age_meas_conf)
* there are kids with math without reading but not vice-versa
  
* SAMPLE: everyone who has math test score and a math and reading self-assessed ability measure and age observed confidence

********************************************************************************
// CREATING WEIGHTS USING THE CONFIDENCE SAMPLE

*First renaming the old weight variable to fit new naming
gen weight_confsamp1 = ch97prwt 

*Cleaning variables for being in the SEO vs. SRC samples
gen hh_from_src = (ER30001 <= 3000)
gen hh_from_seo = (ER30001 >= 5000 & ER30001 <= 7000)

*HH head age
gen hh_hd_under30 = (hh_hd_age <= 30)
gen hh_hd_30_45 = (hh_hd_age > 30 & hh_hd_age <= 45)

*Creating bins for being urban
gen hh_urban_cats = .
	*In or around big central cities
	replace hh_urban_cats = 1 if inlist(hh_rural_urban, 1,2)
	*In smaller metropolitan areas
	replace hh_urban_cats = 2 if inlist(hh_rural_urban, 3,4)
	*Outside metropolitan areas
	replace hh_urban_cats = 3 if inlist(hh_rural_urban, 5,6,7,8,9)
	*Missing
	replace hh_urban_cats = 0 if mi(hh_rural_urban)
gen m_hh_urban_cats = (mi(hh_rural_urban))

*Creating bins for income (1997 quintiles)
*https://www2.census.gov/library/publications/1998/demographics/p60-200.pdf

gen hh_taxincome_qs = 1 if c_familytaxableincome2016d <= 23251
	replace hh_taxincome_qs = 2 if c_familytaxableincome2016d > 23251 & c_familytaxableincome2016d <= 43925
	replace hh_taxincome_qs = 3 if c_familytaxableincome2016d > 43925 & c_familytaxableincome2016d <= 68991
	replace hh_taxincome_qs = 4 if c_familytaxableincome2016d > 68991 & c_familytaxableincome2016d <= 107216
	replace hh_taxincome_qs = 5 if c_familytaxableincome2016d > 107216 & !mi(c_familytaxableincome2016d)

************
****First creating weights from logit regression
*local path "/Users/LucyPage/Dropbox (MIT)/Research_projects/LP_PSID_confidence_paper/Second year paper (PSID)"
local wt_demog hh_from_src hh_from_seo c_female i.r_birthyear ///
	c_chld_white c_chld_black c_chld_hisp ///
	i.c_region i.hh_urban_cats m_hh_urban_cats
	
local wt_demog2 hh_hd_male hh_hd_under30 hh_hd_30_45 ///
	i.hh_taxincome_qs c_familytaxableincome2016d /// 
	r_hh_hd_atleast_grad_hs_ged m_hh_hd_atleast_grad_hs_ged ///
	r_hh_hd_atleast_grad_bach m_hh_hd_atleast_grad_bach ///
	hh_hd_unemployed_pryr
		
local ability r_fst_chld_digitspantotal_raw m_fst_chld_digitspantotal_raw
	
local weight_controls `wt_demog' `wt_demog2' `ability'

local varfortable hh_from_src hh_from_seo c_female c_chld_white c_chld_black c_chld_hisp ///
	1.hh_urban_cats 2.hh_urban_cats 3.hh_urban_cat hh_hd_male hh_hd_under30 hh_hd_30_45 ///
	2.hh_taxincome_qs 3.hh_taxincome_qs 4.hh_taxincome_qs 5.hh_taxincome_qs ///
	c_familytaxableincome2016d r_hh_hd_atleast_grad_hs_ged ///
	r_hh_hd_atleast_grad_bach hh_hd_unemployed_pryr r_fst_chld_digitspantotal_raw m_fst_chld_digitspantotal_raw
	
summ confidence_sample if year == 1997
local mean_outcome = string(`r(mean)', "%4.3f")
	logit confidence_sample `weight_controls' if year == 1997, cluster(famid1997)
		local N = `e(N)'
		local covars: colfullnames e(b)
		
		foreach indepvar in `varfortable' {	
			if strpos("`covars'","`indepvar'") != 0 {
				local varname = subinstr("`indepvar'",".","",.)
				local varname = subinstr("`varname'","atleast","",.)
				local varname = subinstr("`varname'","pctile","",.)
				local varname = subinstr("`varname'","spantotal","",.)
				local varname = subinstr("`varname'","familytaxable","",.)
				if _b[`indepvar']!=0  {
					local b_`varname' = string(_b[`indepvar'], "%4.3f")
					local se_`varname' = string(_se[`indepvar'], "%4.3f")
					get_pval, b(`b_`varname'') se(`se_`varname'') normal
					local star_`varname' "`r(star)'"
					local bstar_`varname' "`b_`varname''`star_`varname''"
					local se_`varname' = "(" + "`se_`varname''" + ")"
				}
				else {
					local bstar_`varname' "--"
					local se_`varname' = ""
					}
			}
		}  
		predict pr_conf_samp if year == 1997 
		replace pr_conf_samp = 0 if m_birthyear == 1 & year == 1997 & mi(pr_conf_samp)
		replace pr_conf_samp = 1 if r_birthyear == 1983 & year == 1997 & mi(pr_conf_samp)
		replace pr_conf_samp = 0 if c_region == 6 & year == 1997 & mi(pr_conf_samp)
		assert !mi(pr_conf_samp) if year == 1997
		gen weight_confsamp2 = weight_confsamp1 / pr_conf_samp if year == 1997

local indepvarlabels `" "From SRC sample" "From SEO sample" "Female" "White" "Black" "Hispanic" "Missing race" "Lives in or around big city" "In smaller metropolitan area" "Outside metropolitan areas" "HH head male" "HH head under 30" "HH head 30 to 45" "2nd quintile taxable income" "3rd quintile taxable income" "4th quintile taxable income" "5th quintile taxable income" "Total taxable income" "HH head at least grad HS/GED" "HH head at least grad bach" "HH head unemployed in last year" "Digit span" "Missing digit span" "'
*Write out table of the regression results
file open ab using "`path'/out/tables/table_confsamp_weights.tex", write replace 

file write ab "\begin{tabular}{l c}" _n 
file write ab "\hline \hline" _n 
file write ab " & \multicolumn{1}{c}{In the confidence sample} \\" _n 
file write ab "\hline" _n 

	forvalues i = 1/23 {
		local indepvar: word `i' of `varfortable'
		local varname = subinstr("`indepvar'",".","",.)
		local varname = subinstr("`varname'","atleast","",.)
		local varname = subinstr("`varname'","pctile","",.)
		local varname = subinstr("`varname'","spantotal","",.)
		local varname = subinstr("`varname'","familytaxable","",.)
		local label: word `i' of `indepvarlabels'
		file write ab "`label' & `bstar_`varname''  \\" _n 
		file write ab " & `se_`varname'' \\" _n 
	}
	
file write ab "\hline " _n 
file write ab "N  & `N'  \\" _n 
file write ab "Outcome mean  & `mean_outcome' \\" _n 
file write ab "\hline \hline " _n 
	file write ab "\end{tabular}" _n
	file close ab 
 
*Censoring extreme weight values
*Replace missing weights with a very high value to include in the percentile calcs
replace weight_confsamp2 = 1000 if year == 1997 & mi(weight_confsamp2)
	_pctile weight_confsamp2 if year == 1997, p(1, 99)
	replace weight_confsamp2 = `r(r1)' if weight_confsamp2 < `r(r1)' & year == 1997
	replace weight_confsamp2 = `r(r2)' if weight_confsamp2 > `r(r1)' & mi(weight_confsamp2) & year == 1997	

******************
****Now creating weights by raking, starting from the 1997 weights
*Rake based on family income, test scores, and race
*Only create the weights if we have no missing data for those variables
gen _one = 1 if confidence_sample == 1 & year == 1997

*Race
gen race = 1 if c_chld_white == 1 & year == 1997
	replace race = 2 if c_chld_black == 1 & year == 1997
	replace race = 3 if c_chld_hisp == 1 & year == 1997
	replace race = 4 if c_chld_white == 0 & c_chld_black == 0 & c_chld_hisp == 0
	replace race = . if confidence_sample == 0 
matrix race1997 = (0.691, 0.121, 0.125, 0.063)
	matrix colnames race1997 = 1 2 3 4
	matrix coleq race1997 = _one
	matrix rownames race1997 = race
	
*Household income quintiles
replace hh_taxincome_qs = . if confidence_sample == 0 
matrix income1997 = (0.2, 0.2, 0.2, 0.2, 0.2)
	matrix colnames income1997 = 1 2 3 4 5
	matrix coleq income1997 = _one
	matrix rownames income1997 = hh_taxincome_qs
	
*Math score quintiles
gen math_quintile = .
	forvalues i = 1/10 {
		replace math_quintile = `i' if c_fst_chld_math_ap_pctile >= (`i'-1) * 10 ///
			& c_fst_chld_math_ap_pctile < `i'*10
	}
	replace math_quintile = 10 if c_fst_chld_math_ap_pctile == 100
	replace math_quintile = . if confidence_sample == 0 	
matrix mathdeciles1997 = (0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
	matrix colnames mathdeciles1997 = 1 2 3 4 5 6 7 8 9 10
	matrix coleq mathdeciles1997 = _one
	matrix rownames mathdeciles1997 = math_quintile 
	
matrix list race1997 
matrix list income1997
matrix list mathdeciles1997

ipfraking [pw=weight_confsamp1] if confidence_sample == 1 & year == 1997, generate(weight_confsamp3) ///
	ctotal(race1997 income1997 mathdeciles1997) trimhiabs(0.002) nograph

*Now filling in the cleaned weights across years
forvalues i = 2/3 {
	bys uid: egen weight_confsamp_full`i' = min(weight_confsamp`i')	
	drop weight_confsamp`i'
	rename weight_confsamp_full`i' weight_confsamp`i'
}	


gen weight_confsamp0 = 1
gen c_thou_familytaxableincome2016d = c_familytaxableincome2016d / 1000 
label var c_thou_familytaxableincome2016d "Total family taxable income (thous 2016 USD)"

*Region dumies
*Values from here: https://www.census.gov/prod/cen2010/briefs/c2010br-01.pdf
*0.190 for Northeast, 0.229 for Midwest, 0.356 for South, 0.225 for West
tab region, gen(reg_dum)
label var reg_dum1 "\hspace{20pt} Northeast"
label var reg_dum2 "\hspace{20pt} Midwest"
label var reg_dum3 "\hspace{20pt} South"
label var reg_dum4 "\hspace{20pt} West"

label var c_female "Child is female" 
label var c_chld_white "Child is white" 
label var c_chld_black "Child is black" 
label var c_chld_hisp "Child is hispanic" 
label var c_fst_chld_math_ap_pctile "Math score percentile" 
label var c_fst_chld_read_total_pctile "Reading score percentile" 


********************************************************************************
// MAKING VARIABLES THAT REQUIRE STANDARDIZATION WITH WEIGHTS

local weightvars weight_confsamp0 weight_confsamp1 weight_confsamp2 weight_confsamp3 

*****Check this section. Only in sample with confidence_sample == 1 ???

* sample to keep for dropping the ends of the pctile distribution
foreach weight in `weightvars' {
	if "`weight'"=="weight_confsamp0" local w = "_w0"
	if "`weight'"=="weight_confsamp1" local w = "_w1"
	if "`weight'"=="weight_confsamp2" local w = "_w2"
	if "`weight'"=="weight_confsamp3" local w = "_w3"
	_pctile c_fst_chld_math_ap_pctile if year==1997 [aweight = `weight'], n(100)
	gen confidence_sample_drop10p`w' = inrange(c_fst_chld_math_ap_pctile, `r(r11)', `r(r89)')
	gen confidence_sample_drop15p`w' = inrange(c_fst_chld_math_ap_pctile, `r(r16)', `r(r84)')
}

*************
// CHILD GENERAL, MATH, AND READING _LEVELS OF_ CONFIDENCE
* standardized raw levels of confidence standardized by age bucket and year, also averaged across measures of to make indices 
* note: don't want to use cnf_selfconcept measures which averages including vars we don't want to use e.g. for gencnf it includes "others think i am a good person" which we don't want
	 	
rename mathcnf_rateownskill  mathcnf_ratesk
rename readcnf_rateownskill readcnf_ratesk

foreach var in gencnf_doimportantthings gencnf_likebeingmyself ///
	gencnf_lottobeproudof gencnf_dothingwellasoth gencnf_goodthingsaboutme ///
	gencnf_goodasothers gencnf_dothingswell ///
	mathcnf_ratesk mathcnf_skillreltopeers ///
	mathcnf_skillrelothersubj mathcnf_expectedperf mathcnf_goodatlearnnew mathcnf_howeasy ///
	mathcnf_howuseful mathcnf_imptobegood mathcnf_interesting mathcnf_enjoy ///
	readcnf_ratesk readcnf_skillreltopeers ///
	readcnf_skillrelothersubj readcnf_expectedperf readcnf_goodatlearnnew readcnf_howeasy ///
	readcnf_howuseful readcnf_imptobegood readcnf_interesting readcnf_enjoy {

    gen std_`var'_w0 = . 
    gen std_`var'_w1 = . 
	gen std_`var'_w2 = . 
	gen std_`var'_w3 = . 
	
    forval bucket = 1/4 {
        foreach year in 1997 2003 2007 {
			forvalues i = 0/3 {
				local j = `i' + 1
				local weight: word `j' of `weightvars'
				sum `var' [aweight = `weight'] if year==`year' & age_meas_conf_bucket==`bucket' & confidence_sample == 1
				if `r(N)'>0 replace std_`var'_w`i' = (`var' - `r(mean)')/`r(sd)' if year==`year' & age_meas_conf_bucket==`bucket'
			}		
        }
    }
}

forvalues i = 0/3 {
	gen std_gencnf_overall_w`i' = (std_gencnf_lottobeproudof_w`i' + std_gencnf_dothingwellasoth_w`i' + ///
			std_gencnf_goodthingsaboutme_w`i' + std_gencnf_goodasothers_w`i' + std_gencnf_dothingswell_w`i')/5  
	// higher is more confident for all of these 
	// dropped chld_gencnf_doimportantthings and chld_gencnf_likebeingmyself because they are only in 1997
}
 			
*************
// TEACHER PERCEPTIONS OF CONFIDENCE
* standardized by age group and year

rename (tch_academiccompetence tch_socialcompetence tch_physicalcompetence) (tch_acadcomp tch_soccomp tch_physcomp)
foreach var in tch_acadcomp tch_soccomp tch_physcomp ///
		emtch_read_ability emtch_math_ability emtch_physical_ability {
    * flip so that higher is higher ability
    if strpos("`var'", "comp")>0 {
        gen temp = 1 if `var'==4 
	replace temp = 2 if `var'==3
	replace temp = 3 if `var'==2
	replace temp = 4 if `var'==1
	drop `var'
	rename temp `var'
	local stub = subinstr("`var'", "tch", "", .)
    }
    if strpos("`var'", "ability")>0 {
        gen temp = 1 if `var'==3 
		replace temp = 2 if `var'==2
		replace temp = 3 if `var'==1
		drop `var'
		rename temp `var'
		local stub = subinstr("`var'", "emtch", "", .)
    }
    gen tch_std`stub'_w0 = . 
	gen tch_std`stub'_w1 = . 
	gen tch_std`stub'_w2 = . 
	gen tch_std`stub'_w3 = . 
    forval bucket = 1/4 {
        foreach year in 1997 2003 2007 {
			forvalues i = 0/3 {
				local j = `i' + 1
				local weight: word `j' of `weightvars'
				sum `var' [aweight = `weight'] if year==`year' & age_tch_meas_ability_bucket==`bucket' & confidence_sample == 1
					if `r(N)'>0 replace tch_std`stub'_w`i' = (`var' - `r(mean)')/`r(sd)' if year==`year' ///
						& age_tch_meas_ability_bucket==`bucket'
			}
		}
    }	
}
rename (tch_*_ability*) (tch_*ability*)

*************
* BIG 5 TRAITS
* standardized by age bucket and year

rename pcgc_difficultyconcentrating pcgc_diffconcentrating
foreach var of varlist pcgc_suddenmoodswings-pcgc_worriestoomuch pcgc_cheerful-pcgc_selfreliant {
    local stub = subinstr("`var'", "pcgc", "", .)

    gen pcgc_std`stub'_w0 = .
	gen pcgc_std`stub'_w1 = .
	gen pcgc_std`stub'_w2 = .
	gen pcgc_std`stub'_w3 = .
    forval bucket = 1/4 {
        foreach year in 1997 2003 2007 {
			forvalues i = 0/3 {
				local j = `i' + 1
				local weight: word `j' of `weightvars'
				sum `var' [aweight = `weight'] if year==`year' & age_meas_conf_bucket==`bucket' & confidence_sample == 1
					if `r(N)'>0 replace pcgc_std`stub'_w`i' = (`var' - `r(mean)')/`r(sd)' if year==`year' & age_meas_conf_bucket==`bucket'
			}
        }
    }	
} 
 
foreach var in pcgc_std_cheats pcgc_std_arguestoomuch pcgc_std_diffconcentrating ///
	pcgc_std_meantoothers pcgc_std_disobedient pcgc_std_troublegettingalong pcgc_std_stubborn ///
	pcgc_std_withdrawn pcgc_std_destructive pcgc_std_clingstoadults pcgc_std_getsoverbeingupset {
	foreach suffix in "_w0" "_w1" "_w2" "_w3" {
		replace `var'`suffix' = -1*`var'`suffix'
	}
}

foreach suffix in "_w0" "_w1" "_w2" "_w3" {
	gen chld_big5_conscientious`suffix' = (pcgc_std_cheats`suffix' + pcgc_std_diffconcentrating`suffix' ///
					+ pcgc_std_waitsturn`suffix' + pcgc_std_doescarefulwork`suffix' + pcgc_std_obedient`suffix')/5
	gen chld_big5_extroversion`suffix' = (pcgc_std_withdrawn`suffix' + pcgc_std_demandsattention`suffix')/2
	gen chld_big5_neuroticism`suffix' = (pcgc_std_suddenmoodswings`suffix' + pcgc_std_highstrung`suffix' + pcgc_std_fearful`suffix' ///
					+ pcgc_std_hasobsessions`suffix' + pcgc_std_paranoid`suffix' + pcgc_std_worriestoomuch`suffix' ///
					+ pcgc_std_getsoverbeingupset`suffix')/7
	gen chld_big5_agreeableness`suffix' = (pcgc_std_arguestoomuch`suffix' + pcgc_std_meantoothers`suffix' + pcgc_std_disobedient`suffix' ///
					+ pcgc_std_troublegettingalong`suffix' + pcgc_std_stubborn`suffix' + pcgc_std_destructive`suffix' ///
					+ pcgc_std_cheerful`suffix' + pcgc_std_getsalong`suffix' + pcgc_std_wellliked`suffix')/9
	gen chld_big5_openness`suffix' = (pcgc_std_impulsive`suffix' + pcgc_std_clingstoadults`suffix' + pcgc_std_hangsaroundtrouble`suffix' + pcgc_std_curious`suffix')/4
}


*************
// PARENT GENDER NORMS
* standardized by year

rename pcgh_preschchildsuffersifmomempl pcgh_preschchildsuffifmomempl 
rename pcgh_independindaughtersandsons pcgh_indepdaughterssons
* gender norm index 
foreach var in pcgh_decisionsmadebymanofhh pcgh_womenhappierathome pcgh_workiseithermensorwomens ///	
	pcgh_betterifmanearnsliving pcgh_betterwifehelphusbcareer pcgh_preschchildsuffifmomempl ///
	pcgh_husbwifesharetasks pcgh_employmomequalunemploymom pcgh_indepdaughterssons ///
	pcgh_dadasinvolvedasmom {
    local stub = subinstr("`var'", "pcgh_", "", 1)	
    gen std_`stub'_w0 = .
    gen std_`stub'_w1 = .
	gen std_`stub'_w2 = .
	gen std_`stub'_w3 = .
    foreach year in 1997 2003 2007 {
    	forvalues i = 0/3 {
			local j = `i' + 1
			local weight: word `j' of `weightvars'
			sum `var' [aweight = `weight'] if year==`year' & confidence_sample == 1
				if `r(N)'>0 gen std_`year' = (`var' - `r(mean)')/`r(sd)' if year==`year'
				else gen std_`year' = .
			replace std_`stub'_w`i' = std_`year' if year==`year'
			drop std_`year'
		}	
    }
}
/* 	*agree = have strong gender norms: 
	pcgh_decisionsmadebymanofhh pcgh_womenhappierathome pcgh_workiseithermensorwomens ///	
	pcgh_betterifmanearnsliving pcgh_betterwifehelphusbcareer pcgh_preschchildsuffersifmomempl
	
	* disagree = have strong gender norms:
	pcgh_husbwifesharetasks pcgh_employmomequalunemploymom pcgh_independindaughtersandsons ///
	pcgh_dadasinvolvedasmom	
*/
// flip the agrees=strong gender norms so that higher number == more gender norms
foreach var in decisionsmadebymanofhh womenhappierathome workiseithermensorwomens ///	
	betterifmanearnsliving betterwifehelphusbcareer preschchildsuffifmomempl {
    replace std_`var'_w0 = -1*std_`var'_w0
    replace std_`var'_w1 = -1*std_`var'_w1
	replace std_`var'_w2 = -1*std_`var'_w2   
	replace std_`var'_w2 = -1*std_`var'_w3   
}
foreach suffix in "_w0" "_w1" "_w2" "_w3" {
	gen std_pcggendernorms`suffix' = (std_womenhappierathome`suffix' + std_betterifmanearnsliving`suffix' ///
		+ std_betterwifehelphusbcareer`suffix' + std_preschchildsuffifmomempl`suffix' + std_husbwifesharetasks`suffix' ///
		+ std_employmomequalunemploymom`suffix' + std_indepdaughterssons`suffix' + std_dadasinvolvedasmom`suffix')/8 
		// drop std_decisionsmadebymanofhh std_workiseithermensorwomens because only asked in 1997
}
drop *decisionsmadebymanofhh* *womenhappierathome* *workiseithermensorwomens* *betterifmanearnsliving* *betterwifehelphusbcareer* *preschchildsuffifmomempl* *husbwifesharetasks* *employmomequalunemploymom* *indepdaughterssons* *dadasinvolvedasmom*
 

*************
* PARENT MENTAL HEALTH MEASURES
* standardized by year

rename pcgc_aggravationinparentingscore pcgc_parentingaggscore
foreach var in pcgc_ownselfesteemscore pcgc_ownselfefficscore ///
	pcgc_parentingaggscore {
    local stub = subinstr("`var'", "pcgc_", "", 1)
	local stub = subinstr("`stub'", "own", "", 1)
	local stub = subinstr("`stub'", "score", "", 1)
    gen std_pcg`stub'_w0 = .   
    gen std_pcg`stub'_w1 = .   
	gen std_pcg`stub'_w2 = .   
	gen std_pcg`stub'_w3 = .
    foreach year in 1997 2003 2007 {
        forvalues i = 0/3 {
			local j = `i' + 1
			local weight: word `j' of `weightvars'
			sum `var' [aweight = `weight'] if year==`year' & confidence_sample == 1
			if `r(N)'>0 gen std_`year' = (`var' - `r(mean)')/`r(sd)' if year==`year'
				else gen std_`year' = .
			replace std_pcg`stub'_w`i' = std_`year' if year==`year'
			drop std_`year'
		}
    }
}

rename pcgh_sosadcouldntbecheered pcgh_sosadnocheer
foreach var in pcgh_nervous pcgh_hopeless pcgh_restless pcgh_everythinganeffort ///
	pcgh_sosadnocheer pcgh_worthless {
    local stub = subinstr("`var'", "pcgh", "" ,  .)
    gen pcgh_std`stub'_w0 = .
    gen pcgh_std`stub'_w1 = .
	gen pcgh_std`stub'_w2 = .
	gen pcgh_std`stub'_w3 = .
    foreach year in 1997 2003 2007 {
		forvalues i = 0/3 {
			local j = `i' + 1
			local weight: word `j' of `weightvars'
			sum `var' [aweight = `weight'] if year==`year' & confidence_sample == 1
			if `r(N)' >0 gen std_`year' = (`var' - `r(mean)')/`r(sd)' if year==`year'
			else gen std_`year' = .
			replace pcgh_std`stub'_w`i' = std_`year' if year==`year'
			drop std_`year'
		}
    }
}
foreach suffix in "_w0" "_w1" "_w2" "_w3" {    
	gen std_pcgmentalhealth`suffix' = (pcgh_std_nervous`suffix' + pcgh_std_hopeless`suffix' + pcgh_std_restless`suffix' ///
		+ pcgh_std_everythinganeffort`suffix' + pcgh_std_sosadnocheer`suffix' + pcgh_std_worthless`suffix')/6
}
 
*************
* CHILD HEALTH
* standardized by age group and year 

gen std_pcgratedchildhlth_w0 = .
gen std_pcgratedchildhlth_w1 = .
gen std_pcgratedchildhlth_w2 = .
gen std_pcgratedchildhlth_w3 = .
foreach year in 1997 2003 2007 {
   forvalues i = 0/3 {
		local j = `i' + 1
		local weight: word `j' of `weightvars'
		sum pcgc_pcgratedchildhlth [aweight = `weight'] if year==`year' & confidence_sample == 1
		if `r(N)'>0 gen std_`year' = (pcgc_pcgratedchildhlth - `r(mean)')/`r(sd)' if year==`year'
		else gen std_`year' = .
		replace std_pcgratedchildhlth_w`i' = -1*std_`year' if year==`year' // flipping so higher is better health! 
		drop std_`year'
	}
}
 
***********************************
* TAS CONFIDENCE VARIABLES *
***********************************		
* standardize by year and over age 21 or not

*General academic confidence variables
	local vars ta_b6bhowgoodatproblemsolving ta_c1chowgoodatlogiccompw_otrs ta_c1ehowintelligentcomparedw_ot ta_c1jhowwelllistencomparedw_oth ta_c1khowgoodatteachingcomparedw 
	local varshorts problemsolving logic intelligent listening teaching 
	
	forvalues i =1/5  {
	local var: word `i' of `vars'
	local shortvar: word `i' of `varshorts'
	gen std1_`shortvar'_w0 = .
	gen std1_`shortvar'_w1 = .
	gen std1_`shortvar'_w2 = .
	gen std1_`shortvar'_w3 = .
	
	forvalues y = 2005(2)2019 {
	forvalues k = 0/1 {
		forvalues i = 0/3 {
			local j = `i' + 1
			local weight: word `j' of `weightvars'
			sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
				gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'  
			replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
		}
	
	drop std1_*temp
	}
	}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_academic_conf`weight' = (std1_problemsolving`weight' + std1_logic`weight' + std1_intelligent`weight' + std1_listening`weight' + std1_teaching`weight')/5
		lab var ta_academic_conf`weight' "Created academic confidence index, not in 2017"
	}
	drop std1_*
	
*General confidence variables
	local vars ta_c1ahowgoodatsupervisingcomp ta_c1bhowgoodatleadingcompw_otrs ta_c1fhowindependentcomparedw_ot ta_c1ghowconfidentcomparedw_othe ta_c1hhowdecisivecomparedw_other ta_c2fho_feeldiscouraged ta_m4freqoffeelingcontribtosocie ta_m9freqfeelmanagngdailyrespons ta_m12freqfeelingconfidentofowni ta_m14freqoffeelinglifehaddirect
	local varshorts supervising leading independent confident decisive discouraged contribsoc manageresp confidentideas lifedirection
	
	forvalues i = 1/10 {
		local var: word `i' of `vars'
		local shortvar: word `i' of `varshorts'
		gen std1_`shortvar'_w0 = .
		gen std1_`shortvar'_w1 = .
		gen std1_`shortvar'_w2 = . 
		gen std1_`shortvar'_w3 = .
		
		forvalues y = 2005(2)2019 {
			forvalues k = 0/1 {	
				forvalues i = 0/3 {
					local j = `i' + 1
					local weight: word `j' of `weightvars'
					sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
						gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
					replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
				}
				
				drop std1_*temp
			}
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_general_conf`weight' = (std1_supervising`weight' + std1_leading`weight' + std1_independent`weight' + std1_confident`weight' + std1_decisive`weight' - std1_discouraged`weight' + std1_contribsoc`weight' + std1_manageresp`weight' + std1_confidentideas`weight' + std1_lifedirection`weight')/10 
		lab var ta_general_conf`weight' "Created general confidence index, not in 2017"
	}
	drop std1_*
		
*Career confidence variables
	local vars ta_c2eho_worryabtfuturejob ta_g36howsuccessfulinjobmostlike ta_g37likelihoodofhavingjobmostl
	local varshorts worryabtfuturejob successindreamjob likelyofdreamjob
	
	forvalues i = 1/3 {
		local var: word `i' of `vars'
		local shortvar: word `i' of `varshorts'
		gen std1_`shortvar'_w0 = .
		gen std1_`shortvar'_w1 = .
		gen std1_`shortvar'_w2 = .
		gen std1_`shortvar'_w3 = .
		
		forvalues y = 2005(2)2019 {
			forvalues k = 0/1 {
				forvalues i = 0/3 {
					local j = `i' + 1
					local weight: word `j' of `weightvars'
					sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
						gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
					replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
			}
			
			drop std1*_temp
			}
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_career_conf`weight' = (std1_successindreamjob`weight' + std1_likelyofdreamjob`weight' - std1_worryabtfuturejob`weight')/3 
		lab var ta_career_conf`weight' "Confidence in career prospects, not 2017 and only 24 or younger"
	}
	drop std1_*
	
*Math confidence
	gen std1_jobgoodatmath_w0 = .
	gen std1_jobphysscience_w0 = .
	gen std1_jobgoodatmath_w1 = .
	gen std1_jobphysscience_w1 = .
	gen std1_jobgoodatmath_w2 = .
	gen std1_jobphysscience_w2 = .
	gen std1_jobgoodatmath_w3 = .
	gen std1_jobphysscience_w3 = .
	forvalues y = 2005(2)2019 {
		forvalues k = 0/1 {
			forvalues i = 0/3 {
				local j = `i' + 1
				local weight: word `j' of `weightvars'
				sum ta_g40ahowgoodinjobrequiringmath [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
					gen std1_jobgoodatmath_w`i'_temp = (ta_g40ahowgoodinjobrequiringmath - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
				replace std1_jobgoodatmath_w`i' = std1_jobgoodatmath_w`i'_temp if year == `y' & ta_overage22 == `k'
				sum ta_g40bhowgoodinjobrequiringtech [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
					gen std1_jobphysscience_w`i'_temp = (ta_g40bhowgoodinjobrequiringtech - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
				replace std1_jobphysscience_w`i' = std1_jobphysscience_w`i'_temp if year == `y' & ta_overage22 == `k'
			}
			
			drop std1_*temp
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_math_conf`weight' = (std1_jobgoodatmath`weight' + std1_jobphysscience`weight')/2 
		lab var ta_math_conf`weight' "Math confidence, not in 2017"
	}
		drop std1_*
	
*Reading confidence
	gen ta_reading_conf_w0 = .
	gen ta_reading_conf_w1 = .
	gen ta_reading_conf_w2 = .
	gen ta_reading_conf_w3 = .
	forvalues y = 2005(2)2019 {
		forvalues k = 0/1 {		
			forvalues i = 0/3 {
				local j = `i' + 1
				local weight: word `j' of `weightvars'				
				sum ta_g40chowgoodinjobw_lotofread_w [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
					gen ta_reading_conf_w`i'_temp = (ta_g40chowgoodinjobw_lotofread_w - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
				replace ta_reading_conf_w`i' = ta_reading_conf_w`i'_temp if year == `y' & ta_overage22 == `k'
			}
			drop ta_reading_conf_*temp
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		lab var ta_reading_conf`weight' "Reading confidence G40c, not in 2017"
	}
	
*Mental health scale
	local vars ta_h14aho_feltnervousinpastmo ta_h14bho_felthopelessinpastmo ta_h14cho_feltrestlessinpastmo ta_h14dho_felteverythingeffort ta_h14eho_felttoosadinpastmo ta_h14fho_feltworthlessinpastmo 
	local shortvars honervous hohopeless horestless hoeverythingeffort hotoosad howorthless
	
	forvalues i = 1/6 {
		local var: word `i' of `vars'
		local shortvar: word `i' of `shortvars'
			
		gen std1_`shortvar'_w0 = .
		gen std1_`shortvar'_w1 = .
		gen std1_`shortvar'_w2 = .
		gen std1_`shortvar'_w3 = .
		forvalues y = 2005(2)2019 {
			forvalues k = 0/1 {
				forvalues i = 0/3 {
					local j = `i' + 1
					local weight: word `j' of `weightvars'
					sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
					gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
					replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
				}
				
				drop std1_*temp
			}
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_mentalhealth`weight' = (std1_honervous`weight' + std1_hohopeless`weight' + std1_horestless`weight' + std1_hoeverythingeffort`weight' + std1_hotoosad`weight' + std1_howorthless`weight')/6
		lab var ta_mentalhealth`weight' "Created mental health index"
	}
	drop std1_*
	
	
*Social anxiety scale
	local vars ta_c2aho_nervousmeetngothers ta_c2bho_feelshy ta_c2cho_feelselfconscious ta_c2gho_feelnervousperforming
	local shortvars nervmeetingothers feelshy feelselfconscious nervousperforming
	
	forvalues i = 1/4 {
		local var: word `i' of `vars'
		local shortvar: word `i' of `shortvars'
		gen std1_`shortvar'_w0 = .
		gen std1_`shortvar'_w1 = .
		gen std1_`shortvar'_w2 = .
		gen std1_`shortvar'_w3 = .
		
		forvalues y = 2005(2)2015 {
			forvalues k = 0/1 {
				forvalues i = 0/3 {
					local j = `i' + 1
					local weight: word `j' of `weightvars'
					sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
					gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
					replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
				}
				drop std1_*temp
			}
		}
	}
	
	local vars2017 ta_b26aho_nervousmeetingothers ta_b26bho_feelshy ta_b26cho_feelselfconscious ta_b26dho_feelnervousperforming
	local shortvars2017 nervmeetingothers feelshy feelselfconscious nervousperforming
	
	forvalues i = 1/4 {
	local var: word `i' of `vars2017'
	local shortvar: word `i' of `shortvars2017'
	
	forvalues y = 2017(2)2019 {
		forvalues k = 0/1 {
			forvalues i = 0/3 {
				local j = `i' + 1
				local weight: word `j' of `weightvars'
				sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
				gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
				replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
			}
			drop std1_*temp
		}
	}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_socanxiety_index`weight' = (std1_nervmeetingothers`weight' + std1_feelshy`weight' + std1_feelselfconscious`weight' + std1_nervousperforming`weight')/4
		lab var ta_socanxiety_index`weight' "Created social anxiety index"
	}
	drop std1_*
	
	
	*Dropping all of the confidence variables
	drop ta_b6* ta_c1ahowgoodatsupervisingcomp ta_c1bhowgoodatleadingcompw_otrs ta_c1chowgoodatlogiccompw_otrs ta_c1dhowgoodathelpingcompw_otrs ta_c1ehowintelligentcomparedw_ot ta_c1fhowindependentcomparedw_ot ta_c1ghowconfidentcomparedw_othe ta_c1hhowdecisivecomparedw_other ta_c1jhowwelllistencomparedw_oth ta_c1khowgoodatteachingcomparedw ta_c2aho_nervousmeetngothers ta_c2bho_feelshy ta_c2cho_feelselfconscious ta_c2dho_worryaboutmoney ta_c2eho_worryabtfuturejob ta_c2fho_feeldiscouraged ta_c2gho_feelnervousperforming ta_g36howsuccessfulinjobmostlike ta_g37likelihoodofhavingjobmostl ta_g40* ta_h14* ta_m*freq*feel* ta_m1frequencyofhappinessinlastm ta_m2freqofinterestinlifeinlastm ta_b27*   ta_b26*
	
***********************************
* BIG 5 PERSONALITY TRAITS *
***********************************	
	
	local big5_cons ta_m15freqsomeonewhodoesthorough ta_m21freqsomeonewhoislazy ta_m25freqsomeonewhodoesthngseff
	local big5_cons_short thorough lazy efficient
	
	local big5_agree ta_m17freqsomeonewhoisrude ta_m20freqsomeonewhohasforgivngn ta_m27freqsomeonewhoisconsiderat
	local big5_agree_short rude forgive kind
	
	local big5_neur ta_m19freqsomeonewhoworriesalot ta_m24freqsomeonewhogetsnervouse ta_m29freqsomeonewhoisrelaxed
	local big5_neur_short worries nervous relaxed
	
	local big5_open ta_m18freqsomeonewhoisoriginal ta_m23freqsomeonewhovaluesart ta_m28freqsomeonewhohasactivimag
	local big5_open_short original art image
	
	local big5_ext ta_m16freqsomeonewhoistalkative ta_m22freqsomeonewhoisoutgoing_s ta_m26freqsomeonewhoisreserved
	local big5_ext_short talk outgoing reserved
	
	foreach type in cons agree neur open ext {
		forvalues i = 1/3 {
			local var: word `i' of `big5_`type''
			local shortvar: word `i' of `big5_`type'_short'
			gen std1_`shortvar'_w0 = .
			gen std1_`shortvar'_w1 = .
			gen std1_`shortvar'_w2 = .
			gen std1_`shortvar'_w3 = .
			
			*Note that coding on the thorough variables, etc. is different in 2019, so need to standardize by year
			forvalues y = 2005(2)2019 {
				forvalues k = 0/1 {	
					forvalues i = 0/3 {
						local j = `i' + 1
						local weight: word `j' of `weightvars'
						sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
						gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
						replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
					}
					drop std1_*temp
				}
			}
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" { 
		gen ta_big5_cons`weight' = (std1_thorough`weight' - std1_lazy`weight' + std1_efficient`weight') /3 
			lab var ta_big5_cons`weight' "Stnd index of conscientiousness"
		gen ta_big5_agree`weight' = (std1_forgive`weight' + std1_kind`weight' - std1_rude`weight') /3 
			lab var ta_big5_agree`weight' "Stnd index of agreeableness"
		gen ta_big5_neur`weight' = (std1_worries`weight' + std1_nervous`weight' - std1_relaxed`weight') /3
			lab var ta_big5_neur`weight' "Stnd index of neuroticism"
		gen ta_big5_open`weight' = (std1_original`weight' + std1_art`weight' + std1_image`weight') /3
			lab var ta_big5_open`weight' "Stnd index of openness to experiences"
		gen ta_big5_ext`weight' = (std1_talk`weight' + std1_outgoing`weight' - std1_reserved`weight')/3
			lab var ta_big5_ext`weight' "Stnd index of extroversion"
	}
		drop std1_*
		drop ta_m*freq*
	
***********************************
* TAS DANGEROUS BEHAVIOR *
***********************************	

	local vars ta_k11ho_didsomethingdangerous ta_k12ho_damagedpublicproperty ta_k13ho_gotintophysicalfight ta_k15ho_drovewhendrunkorhigh ta_k16ho_rodewithdrunkdriver 
	local varshorts diddangerous damagedpublicprop gotintophysfight drovedrunkorhigh rodedrunkdriver 
	
	forvalues i = 1/5 {
		local var: word `i' of `vars'
		local shortvar: word `i' of `varshorts'
		gen std1_`shortvar'_w0 = .
		gen std1_`shortvar'_w1 = . 
		gen std1_`shortvar'_w2 = . 
		gen std1_`shortvar'_w3 = . 
		
		forvalues y = 2005(2)2019 {
			forvalues k = 0/1 {	
				forvalues i = 0/3 {
					local j = `i' + 1
					local weight: word `j' of `weightvars'
					sum `var' [aweight = `weight'] if year == `y' & ta_overage22 == `k'  & confidence_sample == 1
					gen std1_`shortvar'_w`i'_temp = (`var' - r(mean))/r(sd) if year == `y' & ta_overage22 == `k'
					replace std1_`shortvar'_w`i' = std1_`shortvar'_w`i'_temp if year == `y' & ta_overage22 == `k'
				}
				drop std1_*temp
			}
		}
	}
	foreach weight in "_w0" "_w1" "_w2" "_w3" {
		gen ta_dangerous_behavior_index`weight' = (std1_diddangerous`weight' + std1_damagedpublicprop`weight' +  std1_gotintophysfight`weight' + std1_drovedrunkorhigh`weight' + std1_rodedrunkdriver`weight' )/5 
		lab var ta_dangerous_behavior_index`weight' "Dangerous behavior index"
	}
	drop ta_k11ho_didsomethingdangerous ta_k12ho_damagedpublicproperty ta_k13ho_gotintophysicalfight ta_k15ho_drovewhendrunkorhigh ta_k16ho_rodewithdrunkdriver std1_*


	
*************
* CONTINUOUS MEASURES OF CONFIDENCE (THAT REQUIRE WEIGHTING)
 
local weightvars weight_confsamp0 weight_confsamp1 weight_confsamp2 weight_confsamp3 

foreach subj in math read {
if "`subj'"=="math" local test "ap"
if "`subj'"=="read" local test "total"
    
    local var `subj'cnf_ratesk 
	local newstub ratesk
	local oldstub rateownskill
	gen `var'5 = `var'-2 if inrange(`var', 4, 7)
	replace `var'5 = 1 if inrange(`var', 1, 3)

	foreach weight in `weightvars' {
		if "`weight'"=="weight_confsamp0" local w = "_w0"
		if "`weight'"=="weight_confsamp1" local w = "_w1"
		if "`weight'"=="weight_confsamp2" local w = "_w2"
		if "`weight'"=="weight_confsamp3" local w = "_w3"

		* option 1: predicted confidence based on test score bucket level and take diff assuming true beliefs match distribution of reported
		gen rsd_`subj'cnf_`newstub'7`w' = .
		gen rsd_`subj'cnf_`newstub'5`w' = .
		gen prd_`subj'cnf_`newstub'7`w' = .
		gen prd_`subj'cnf_`newstub'5`w' = .
		forval bucket = 1/4 {
			local lasttopscore = 0
			forval i = 1/7 {
				count if age_meas_conf_bucket==`bucket' & !mi(`var') & !mi(chld_`subj'_`test'_pctile)
				if `r(N)'>0 {
					gen temp = `var'<=`i' if age_meas_conf_bucket==`bucket' & !mi(`var') & !mi(chld_`subj'_`test'_pctile)
					sum temp [aweight = `weight']
					local pctile = round(1000*`r(mean)')
					if `pctile'>=1000 local pctile 999
					_pctile chld_`subj'_`test'_pctile if age_meas_conf_bucket==`bucket' & !mi(chld_`subj'_`test'_pctile) [weight = `weight'], n(1000)
					local topscore = `r(r`pctile')'
					if `topscore'==99 local topscore 100
					replace prd_`subj'cnf_`newstub'7`w' = `i' if age_meas_conf_bucket==`bucket' & chld_`subj'_`test'_pctile<`topscore'	& chld_`subj'_`test'_pctile>=`lasttopscore'
					drop temp
					local lasttopscore = `topscore'
				}
			}
		} 
		replace prd_`subj'cnf_`newstub'5`w' =  prd_`subj'cnf_`newstub'7`w'-2 if inrange(prd_`subj'cnf_`newstub'7`w', 4, 7)
		replace prd_`subj'cnf_`newstub'5`w' =  1 if inrange(prd_`subj'cnf_`newstub'7`w', 1, 3)
		replace rsd_`subj'cnf_`newstub'7`w' = `var' - prd_`subj'cnf_`newstub'7`w'
		replace rsd_`subj'cnf_`newstub'5`w' = `var'5 - prd_`subj'cnf_`newstub'5`w'		
	

		* option 2: predicted confidence assuming uniform distribution and take difference
		gen rsd_u_`subj'cnf_`newstub'7`w'  = .
		gen prd_u_`subj'cnf_`newstub'7`w'  = .
		gen rsd_u_ave_`subj'cnf_`newstub'7`w'  = .
		gen prd_u_ave_`subj'cnf_`newstub'7`w'  = .
		local lasttopscore = 0
		forval i = 1/7 {
			local j = `i'*(100/7) // assuming a uniform distribution
				replace prd_u_`subj'cnf_`newstub'7`w' = `i' if chld_`subj'_`test'_pctile<`j' & chld_`subj'_`test'_pctile>=`lasttopscore'
				replace prd_u_ave_`subj'cnf_`newstub'7`w' = `i' if  chld_ave_`subj'_`test'_pctile<`j' & chld_ave_`subj'_`test'_pctile>=`lasttopscore'
				local lasttopscore = `j'
		}
		replace rsd_u_`subj'cnf_`newstub'7`w' = `var' - prd_u_`subj'cnf_`newstub'7`w'
		replace rsd_u_ave_`subj'cnf_`newstub'7`w' = chld_ave_`subj'cnf_`oldstub' - prd_u_ave_`subj'cnf_`newstub'7`w'


		* option 3:  difference between pctile of test score and pctile of confidence (in our sample)
		gen samplepct_`subj'cnf_`newstub'`w' = .
		gen samplepct_`subj'test`w' = .
		forval i = 1/4 {
			count if age_meas_conf_bucket==`i' & !mi(`var') & !mi(chld_`subj'_`test'_pctile)
			if `r(N)'>0 {
				xtile temp = `var' [aweight = `weight'] if age_meas_conf_bucket==`i' & !mi(`var') & !mi(chld_`subj'_`test'_pctile), n(100)
				replace samplepct_`subj'cnf_`newstub'`w' = temp if age_meas_conf_bucket==`i' & !mi(`var') & !mi(chld_`subj'_`test'_pctile)
				drop temp
				
				xtile temp = chld_`subj'_`test'_pctile [aweight = `weight'] if age_meas_conf_bucket==`i' & !mi(`var') & !mi(chld_`subj'_`test'_pctile), n(100)
				replace samplepct_`subj'test`w' = temp if age_meas_conf_bucket==`i' & !mi(`var') & !mi(chld_`subj'_`test'_pctile)
				drop temp
				
				
			}
		}

		gen rsd_p_`subj'cnf_`newstub'`w' = samplepct_`subj'cnf_`newstub'`w' - samplepct_`subj'test`w'
		
		foreach cnf in rsd_`subj'cnf_`newstub'7`w' rsd_u_`subj'cnf_`newstub'7`w' rsd_p_`subj'cnf_`newstub'`w' {
			bys uid: egen ave_`cnf' = mean(`cnf')
		}
    }
} 

rename mathcnf_ratesk mathcnf_ratesk7
rename readcnf_ratesk readcnf_ratesk7

********************************************************************************
// CLEAN UP CONTROLS THAT NEED TO CORRESPOND TO FIRST AND SECOND OBSERVATION & 
// ALSO REQUIRED STANDARDIZATION

local prefixlist "fst sec"
local rename_fst = 0
foreach var of varlist std_gencnf_overall* ///
	rsd_*cnf_ratesk* prd_*cnf_ratesk* ///
	std_mathcnf_ratesk* std_readcnf_ratesk* ///
	tch_std_acadcomp* tch_std_soccomp* tch_std_physcomp* ///
	 chld_big5* std_pcggendernorms* std_pcgmentalhealth* std_pcgselfesteem* ///
	 std_pcgselfeffic* std_pcgparentingagg* std_pcgratedchildhlth* ///
	{
    
	if strpos("`var'", "_ave_")==0 & strpos("`var'", "_ave")!=1 {
    if "`var'"=="std_mathcnf_ratesk_w0" local prefixlist "sec"
    if "`var'"=="tch_std_acadcomp_w0" local prefixlist "fst"
    if "`var'"=="std_pcggendernorms_w0" local rename_fst = 1

    foreach prefix in `prefixlist' {
        gen temp = `var' if year==`prefix'_year_has_conf
		if "`prefix'"=="fst" replace temp = `var' if year==1997 & mi(fst_year_has_conf)
        bys uid: egen `prefix'_`var' = max(temp)
        drop temp
		gen m_`prefix'_`var' = mi(`prefix'_`var')
		sum m_`prefix'_`var'

        if `r(mean)'==0 & `r(sd)'==0 {
	    drop m_`prefix'_`var'
	    local rename "rename (c_`prefix'_`var') (c_`var')"
	}
        else {
            gen r_`prefix'_`var' = `prefix'_`var'
            replace r_`prefix'_`var' = 0 if mi(`prefix'_`var')
			local rename "rename (c_`prefix'_`var' r_`prefix'_`var' m_`prefix'_`var') (c_`var' r_`var' m_`var')"
        }
	if "`prefix'"=="fst" rename `prefix'_`var' c_`prefix'_`var'
	if `rename_fst'==1 `rename'
    }     
	}
} 

* recodes of some outcome variables for mediation analysis
foreach var of varlist ta_big5* ta_general_conf_w* ta_academic_conf_w* ta_career_conf_w* ///
	ta_math_conf_w* ta_reading_conf_w*  {
gen prev_mean_`var' = .
forval year = 2003/2019 {
bys uid: egen mean_`var'_`year' = mean(`var') if year<=`year'
replace prev_mean_`var' = mean_`var'_`year' if year==`year'
drop mean_`var'_`ever'
}
}
foreach var in occ_stem_self occ_stem_health_self occ_health_self occ_othhiskll_self {
gen prev_ever_`var' = .
forval year = 2003/2019 {
bys uid: egen ever_`var'_`year' = max(`var') if year<=`year'
replace prev_ever_`var' = ever_`var'_`year' if year==`year'
drop ever_`var'_`year'
}
}

foreach var of varlist prev_mean_ta_* prev_ever_occ_stem_self prev_ever_occ_stem_health_self prev_ever_occ_health_self prev_ever_occ_othhiskll_self ever_atleast_grad_hs_self ever_atleast_grad_bach_self ever_atleast_grad_grad_self stemmajor stemhlthmajor {
    gen r_`var' = `var'
    replace r_`var' = 0 if mi(`var')
    gen m_`var' = mi(`var')
}



* ability deciles
foreach subj in math read {
	if "`subj'"=="math" local test ap
	else local test total
	
	local c = 1
	local i = -1
	forval j = 10(10)100 {
		gen c_fst_`subj'_decile`c' = c_fst_chld_`subj'_`test'_pctile > `i' & c_fst_chld_`subj'_`test'_pctile <= `j' if !mi(c_fst_chld_`subj'_`test'_pctile )
		if "`subj'"=="read" {
			gen r_fst_`subj'_decile`c' = c_fst_`subj'_decile`c'
			replace r_fst_`subj'_decile`c' = 0 if mi(c_fst_`subj'_decile`c')
		}
		
		gen sec_`subj'_decile`c' = r_sec_chld_`subj'_`test'_pctile > `i' & r_sec_chld_`subj'_`test'_pctile<=`j' if m_sec_chld_`subj'_`test'_pctile==0
		gen r_sec_`subj'_decile`c' = sec_`subj'_decile`c'
		replace r_sec_`subj'_decile`c' = 0 if mi(sec_`subj'_decile`c')

	local c = `c'+1
	local i = `j'
	}
}


* standardizing the continuous confidence measures
foreach subj in math read {
	forval w = 0/3 {
		foreach var in rsd_p_`subj'cnf_ratesk_w`w'  rsd_u_`subj'cnf_ratesk7_w`w'  rsd_`subj'cnf_ratesk7_w`w' {
			gen temp = c_fst_`var' if year==fst_year_has_conf
			egen std  = std(temp) if confidence_sample==1
			bys uid: egen c_fst_z_`var' =  max(std)  
			drop  temp std
		}
	}
	
}
********************************************************************************
// Age-adjusted average panel outcomes and average around age 30

foreach var in occ_stem_health_self occ_stem_self occ_othhiskll_self ln_earnings_self unemp_self {
	reg `var' i.age
	predict resid, resid
	bys uid: egen `var'_aa = mean(resid)
	bys uid: egen `var'_aa_over25 = mean(resid) if age>25
	bys uid: ereplace `var'_aa_over25 = min(`var'_aa_over25) 
	drop resid
	
	bys uid: egen `var'_ave_26_36 = mean(`var') if inrange(age, 26, 36)
	bys uid: egen `var'_ave_28_33 = mean(`var') if inrange(age, 28, 33)
	
	bys uid: ereplace `var'_ave_26_36 = min(`var'_ave_26_36) 
	bys uid: ereplace `var'_ave_28_33 = min(`var'_ave_28_33) 

}


********************************************************************************
// CREATING THE FULL SAMPLE

* will drop if you don't have math or reading test scores, save this to make full dataset
tempfile hold
save `hold' 
 
********************************************************************************
// NOW KEEPING ONLY THE CONFIDENCE SAMPLE
 
keep if confidence_sample==1
assert m_fst_math_overcnf_orig==0 
 
********************************************************************************
// OTHER FINAL CLEANING

* find sample with sibling in the sample
bys famid1997 year: gen count = _N 
replace count = . if year !=1997
bys uid: egen sibsinsample = max(count)
replace sibsinsample = sibsinsample-1 
drop count

gen  c_fst_age_meas_conf_round = round(c_fst_age_meas_conf)



********************************************************************************
// START WRITING OUT DATA

#delimit ;
 

local idvars uid year hh_interviewnum famid1997 confidence_sample* *sibsinsample ch97prwt ch02prwt ch07prwt weight_confsamp* num_hasconfandscore* sampling_* ;

local rhsconfidencevars  ?_???_std_gencnf_overall* c_fst_*cnf* c_*pctilea* c_*pctileb* c_*rcnf* *_ave_* ave_* c_*_rsd_*cnf_rate* c_fst_*decile* ;

local correlates ?_*mathcnf_* ?_*readcnf_* 
	?_*acadcomp* ?_*physcomp* ?_*soccomp* ?_*_ability* max_*exp* ?_*pcgc_childnumfriends* 
	?_*chld_feelpartofschool* ?_*chld_feelclosetopeers* ?_*pcgc_expectededuc* ?_*chld_expectededuc* ;

local demcontrols ?_female ?_chld_white ?_chld_black ?_chld_hisp ?_state
	?_birthquarter ?_birthyear age ?_birthorder c_fst_age_meas_conf_round;

local parentcontrols ?_pp_read ?_pp_art ?_pp_sports ?_pp_homework ?_pp_games ?_pp_physaffect
	?_pp_saidily ?_everspanked ?_brstfd ?_fatherinhh ?_twoadulthh ?_pp_appreciateeveryday 
	?_mostimp_obey ?_mostimp_thinkforself ?_mostimp_workhard ?_mostimp_helpothers 
	?_std_pcggendernorms* ?_std_pcgmentalhealth* ?_std_pcgselfesteem* ?_std_pcgselfeffic* 
	?_std_pcgparentingagg* ?_mainmealtogethgeq6days ?_*_pred_final_educ ;

local othercontrols ?_fst_*pctile_dv ?_fst_*pctile ?_fst_*decile* ?_fst_*age* fst_*year* 
	?_fst_evergifted* ?_fst_eversped* ?_fst_everrepeatedgrade* ?_fst_frpl* ?_schpctwhite 
	?_schpctblack ?_schpcthispanic ?_schpctfrpl ?_schppe ?_schstutchratio ?_std_pcgratedchildhlth* 
	?_*numsibsinhh* ?_recgovtransfers ?_recssi ?_publichousing 
	?_recfoodstamps ?_familytotalincome2016d ?_familytaxableincome2016d* ?_atleast_grad_*dad ?_atleast_grad_*mom 
	?_occ_*_mom ?_occ_*dad ?_childofheads ?_agewhenborn_dad ?_agewhenborn_mom ?_fst_chld_big5* 
	?_fst_chld_digitspantotal_raw ?_sec_chld_digitspantotal_raw ?_sec_*pctile_dv 
	?_tch_yearsatschool ?_*acadcomp* ?_*physcomp* ?_*soccomp*;
	
local latewidecontrols r_sec_* m_sec_*;

local latepanelcontrols r_sec_* m_sec_* r_prev_mean_ta_* m_prev_mean_ta_* r_prev_ever_occ_* m_prev_ever_occ_* r_ever_atleast_* m_ever_atleast_* r_stemmajor m_stemmajor r_stemhlthmajor m_stemhlthmajor;

local lhsconfidencevars sec_*cnf* sec_*pctile ta_academic_conf* ta_general_conf* ta_career_conf* ta_math_conf* ta_reading_conf* ta_big5*;

local lhsemplvars unemp_self ln_earnings_self occ_stem_self occ_stem_health_self occ_health_self occ_othhiskll_self urban_bigcentral_self 
urban_bigplusfringe_self *_aa* *_ave_26_36 *_ave_28_33;

local lhseducvars ever_atleast*  stemmajor stemhlthmajor; 

local lhsfamilyvars evermarried everdivorced evermarriedcohabit evermarried_neverdiv 
	max_num_child agefirstchild  isheadorwife ta_current_rom_relationship
	ta_satis_romantic;

local lhsothervars everarrested everjailed everhadpsychdiag ta_mentalhealth* ta_socanxiety_index*
	  ta_drink_alc_often ta_binge_drink_alc
	ta_smoke_cig_regularly ta_dangerous_behavior_index*;


local paneloutvars unemp_self ln_earnings_self occ_stem_self occ_othhiskll_self occ_stem_health_self occ_health_self
	ta_mentalhealth* ta_socanxiety_index* 
	 ta_drink_alc_often ta_binge_drink_alc ta_smoke_cig_regularly
	 ta_dangerous_behavior_index* ta_current_rom_relationship ta_satis_romantic
	isheadorwife ta_academic_conf* ta_general_conf* ta_career_conf* 
	ta_math_conf* ta_reading_conf* ta_big5* urban_bigcentral_self urban_bigplusfringe_self ;

local wideoutvars ever_atleast* evermarried everdivorced evermarriedcohabit
	evermarried_neverdiv max_num_child  ever_atleast*  stemmajor stemhlthmajor agefirstchild
	everarrested everjailed everhadpsychdiag
	?_*mathcnf_* ?_*readcnf_*
	 ?_*_ability* max_*exp* ?_*pcgc_childnumfriends* 
	?_*chld_feelpartofschool* ?_*chld_feelclosetopeers* ?_*pcgc_expectededuc* ?_*chld_expectededuc* sec_* *_aa* *_ave_26_36 *_ave_28_33; 

keep `idvars' `rhsconfidencevars' `correlates' `demcontrols' `parentcontrols' 
	`othercontrols' `instruments' `lhsconfidencevars' `lhsemplvars' `lhseducvars'
	`lhsfamilyvars' `lhsothervars' `latepanelcontrols' `latewidecontrols' ; 
	
order `idvars' `lhsconfidencevars' `lhsemplvars' `lhseducvars' `lhsfamilyvars' 
	`lhsothervars' `rhsconfidencevars' `instruments'  `demcontrols' `parentcontrols' 
	`othercontrols'  `correlates' `latecontrols' ;


tempfile temp_data
save `temp_data' 

#delimit cr
keep `idvars'  `demcontrols' `parentcontrols' `othercontrols' `instruments' `wideoutvars' `rhsconfidencevars' `latewidecontrols'
drop age hh_interviewnum year

duplicates drop
bys uid: gen count = _N
assert count==1
drop count
gen year = 8888


* Label outcome variables for table descriptions

forvalues i = 0/3 {
	label var sec_std_gencnf_overall_w`i' "Later childhood general confidence"
}
label var sec_chld_math_ap_pctile "Adolescent math scores"
label var sec_chld_read_total_pctile "Adolescent reading scores"
label var sec_math_undrcnf_orig "Math adolescent under-confidence"
label var sec_math_overcnf_orig "Math adolescent over-confidence"
label var sec_read_undrcnf_orig "Reading adolescent under-confidence"
label var sec_read_overcnf_orig "Reading adolescent over-confidence"
label var ever_atleast_grad_hs_self "High school degree"
label var ever_atleast_grad_bach_self "College degree"
label var ever_atleast_grad_grad_self "Graduate degree"
label var stemmajor "STEM major"
label var stemhlthmajor "STEM major (with health)"
label var evermarried "Ever married"
label var everdivorced  "Ever divorced"
label var evermarriedcohabit "Ever cohabitated with partner"
label var evermarried_neverdiv "Ever married, never divorced"
label var max_num_child "Number of children"
label var agefirstchild "Age at first child"
label var everarrested "Ever arrested"
label var everjailed "Ever jailed"
label var everhadpsychdiag "Ever had psych diagnosis"


foreach var in ln_earnings_self_aa ln_earnings_self_aa_over25 ln_earnings_self_ave_26_36 ln_earnings_self_ave_28_33 {
	label var `var' "Ln(Earnings)"
}
foreach var in unemp_self_aa unemp_self_aa_over25 unemp_self_ave_26_36 unemp_self_ave_28_33 {
	label var `var' "Unemployed this year"

}
foreach var in occ_othhiskll_self_aa occ_othhiskll_self_aa_over25 occ_othhiskll_self_ave_26_36 occ_othhiskll_self_ave_28_33 {
	label var `var' "Non-STEM high-educ occ."
	
}
foreach var in occ_stem_self_aa occ_stem_self_aa_over25 occ_stem_self_ave_26_36 occ_stem_self_ave_28_33 {
	label var `var' "Works in STEM (non-health)"
}
foreach var in occ_stem_health_self_aa occ_stem_health_self_aa_over25 occ_stem_health_self_ave_26_36 occ_stem_health_self_ave_28_33 {
	label var `var' "Works in STEM"
	
}


foreach adult in parent tch {
	
	gen r_fst_`adult'_pred_postbach = r_fst_`adult'_pred_final_educ==4
	gen r_fst_`adult'_pred_bach = r_fst_`adult'_pred_final_educ==3
	gen r_fst_`adult'_pred_hsgrad = r_fst_`adult'_pred_final_educ==2
	
}

save "`path'/replication materials/clean/CDS_TAS_PSID_analysis_1year.dta", replace

use `temp_data', clear

keep `idvars'  `demcontrols' `parentcontrols' `othercontrols' `instruments' `paneloutvars' `rhsconfidencevars'  `latepanelcontrols'

* Label outcome variables for table descriptions

label var unemp_self "Ever unemployed this year"
label var ln_earnings_self "Log Earnings"
label var occ_stem_self "Works in a STEM occupation"
label var occ_stem_health_self "Works in a STEM occupation (with health)"
label var occ_health_self "Works in a healthcare occupation"
label var occ_othhiskll_self "Works in non-STEM high-skill occupation"

label var ta_drink_alc_often "Drinks alcohol often" 
label var ta_binge_drink_alc "Binge drinks alcohol"
label var ta_smoke_cig_regularly "Smokes cigarettes regularly"
label var ta_current_rom_relationship "In a romantic relationship"
label var ta_satis_romantic "Satisfied with romantic relationship"
label var isheadorwife "Is \emph{Head} or \emph{Wife}"
label var urban_bigcentral_self "Lives in MSA with pop>1 mil"
label var urban_bigplusfringe_self "Lives in MSA with pop>1 mil"

forvalues i = 0/3 {
	label var ta_academic_conf_w`i' "Academic confidence" 
	label var ta_general_conf_w`i' "General confidence"
	label var ta_career_conf_w`i'  "Career confidence"
	label var ta_math_conf_w`i' "Math confidence"
	label var ta_reading_conf_w`i' "Reading confidence"
	label var ta_mentalhealth_w`i'  "Mental health"
	label var ta_socanxiety_index_w`i' "Social anxiety"
	label var ta_dangerous_behavior_index_w`i' "Dangerous behavior"
	label var ta_big5_cons_w`i' "Conscientiousness"
	label var ta_big5_agree_w`i' "Agreeableness"
	label var ta_big5_neur_w`i' "Neuroticism"
	label var ta_big5_open_w`i' "Openness"
	label var ta_big5_ext_w`i' "Extroversion"
}


gen ln_earnings_self_over25 = ln_earnings_self if age>=26
label var ln_earnings_self_over25 "Ln(Earnings)"

gen unemp_self_over25 = unemp_self if age>=26
label var unemp_self_over25 "Unemployed this year"

gen urban_bigcentral_self_over25 =  urban_bigcentral_self if age>=26
label var urban_bigcentral_self_over25 "Lives in MSA with pop>1 mil (Age>25)"

gen urban_bigplusfringe_self_over25 =  urban_bigplusfringe_self if age>=26
label var urban_bigplusfringe_self_over25 "Lives in MSA with pop>1 mil (Age>25)"

gen occ_othhiskll_self_over25 = occ_othhiskll_self if age>=26
label var occ_othhiskll_self_over25 "Non-STEM high-educ occ."

gen occ_stem_self_over25 = occ_stem_self if age>=26
label var occ_stem_self_over25 "Works in STEM"

gen occ_stem_health_self_over25 = occ_stem_health_self if age>=26
label var occ_stem_health_self_over25 "Works in STEM (with health)"

gen occ_health_self_over25 = occ_health_self if age>=26
label var occ_health_self_over25 "Works in a healthcare occupation (Age>25)"

	foreach adult in parent tch {
	
	gen r_fst_`adult'_pred_postbach = r_fst_`adult'_pred_final_educ==4
	gen r_fst_`adult'_pred_bach = r_fst_`adult'_pred_final_educ==3
	gen r_fst_`adult'_pred_hsgrad = r_fst_`adult'_pred_final_educ==2
	
}

save "`path'/replication materials/clean/CDS_TAS_PSID_analysis_panel.dta", replace


